sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce( 154 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 155) -> exp.Coalesce: 156 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 157 158 159def build_locate_strposition(args: t.List): 160 return exp.StrPosition( 161 this=seq_get(args, 1), 162 substr=seq_get(args, 0), 163 position=seq_get(args, 2), 164 ) 165 166 167class _Parser(type): 168 def __new__(cls, clsname, bases, attrs): 169 klass = super().__new__(cls, clsname, bases, attrs) 170 171 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 172 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 173 174 return klass 175 176 177class Parser(metaclass=_Parser): 178 """ 179 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 180 181 Args: 182 error_level: The desired error level. 183 Default: ErrorLevel.IMMEDIATE 184 error_message_context: The amount of context to capture from a query string when displaying 185 the error message (in number of characters). 186 Default: 100 187 max_errors: Maximum number of error messages to include in a raised ParseError. 188 This is only relevant if error_level is ErrorLevel.RAISE. 189 Default: 3 190 """ 191 192 FUNCTIONS: t.Dict[str, t.Callable] = { 193 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 194 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 195 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 196 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 200 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 201 ), 202 "CHAR": lambda args: exp.Chr(expressions=args), 203 "CHR": lambda args: exp.Chr(expressions=args), 204 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 205 "CONCAT": lambda args, dialect: exp.Concat( 206 expressions=args, 207 safe=not dialect.STRICT_STRING_CONCAT, 208 coalesce=dialect.CONCAT_COALESCE, 209 ), 210 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONVERT_TIMEZONE": build_convert_timezone, 216 "DATE_TO_DATE_STR": lambda args: exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 221 start=seq_get(args, 0), 222 end=seq_get(args, 1), 223 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 224 ), 225 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 226 "HEX": build_hex, 227 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 228 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 229 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 230 "LIKE": build_like, 231 "LOG": build_logarithm, 232 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 233 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 234 "LOWER": build_lower, 235 "LPAD": lambda args: build_pad(args), 236 "LEFTPAD": lambda args: build_pad(args), 237 "LTRIM": lambda args: build_trim(args), 238 "MOD": build_mod, 239 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 240 "RPAD": lambda args: build_pad(args, is_left=False), 241 "RTRIM": lambda args: build_trim(args, is_left=False), 242 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 243 if len(args) != 2 244 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 245 "STRPOS": exp.StrPosition.from_arg_list, 246 "CHARINDEX": lambda args: build_locate_strposition(args), 247 "INSTR": exp.StrPosition.from_arg_list, 248 "LOCATE": lambda args: build_locate_strposition(args), 249 "TIME_TO_TIME_STR": lambda args: exp.Cast( 250 this=seq_get(args, 0), 251 to=exp.DataType(this=exp.DataType.Type.TEXT), 252 ), 253 "TO_HEX": build_hex, 254 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 255 this=exp.Cast( 256 this=seq_get(args, 0), 257 to=exp.DataType(this=exp.DataType.Type.TEXT), 258 ), 259 start=exp.Literal.number(1), 260 length=exp.Literal.number(10), 261 ), 262 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 263 "UPPER": build_upper, 264 "VAR_MAP": build_var_map, 265 } 266 267 NO_PAREN_FUNCTIONS = { 268 TokenType.CURRENT_DATE: exp.CurrentDate, 269 TokenType.CURRENT_DATETIME: exp.CurrentDate, 270 TokenType.CURRENT_TIME: exp.CurrentTime, 271 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 272 TokenType.CURRENT_USER: exp.CurrentUser, 273 } 274 275 STRUCT_TYPE_TOKENS = { 276 TokenType.NESTED, 277 TokenType.OBJECT, 278 TokenType.STRUCT, 279 TokenType.UNION, 280 } 281 282 NESTED_TYPE_TOKENS = { 283 TokenType.ARRAY, 284 TokenType.LIST, 285 TokenType.LOWCARDINALITY, 286 TokenType.MAP, 287 TokenType.NULLABLE, 288 TokenType.RANGE, 289 *STRUCT_TYPE_TOKENS, 290 } 291 292 ENUM_TYPE_TOKENS = { 293 TokenType.DYNAMIC, 294 TokenType.ENUM, 295 TokenType.ENUM8, 296 TokenType.ENUM16, 297 } 298 299 AGGREGATE_TYPE_TOKENS = { 300 TokenType.AGGREGATEFUNCTION, 301 TokenType.SIMPLEAGGREGATEFUNCTION, 302 } 303 304 TYPE_TOKENS = { 305 TokenType.BIT, 306 TokenType.BOOLEAN, 307 TokenType.TINYINT, 308 TokenType.UTINYINT, 309 TokenType.SMALLINT, 310 TokenType.USMALLINT, 311 TokenType.INT, 312 TokenType.UINT, 313 TokenType.BIGINT, 314 TokenType.UBIGINT, 315 TokenType.INT128, 316 TokenType.UINT128, 317 TokenType.INT256, 318 TokenType.UINT256, 319 TokenType.MEDIUMINT, 320 TokenType.UMEDIUMINT, 321 TokenType.FIXEDSTRING, 322 TokenType.FLOAT, 323 TokenType.DOUBLE, 324 TokenType.UDOUBLE, 325 TokenType.CHAR, 326 TokenType.NCHAR, 327 TokenType.VARCHAR, 328 TokenType.NVARCHAR, 329 TokenType.BPCHAR, 330 TokenType.TEXT, 331 TokenType.MEDIUMTEXT, 332 TokenType.LONGTEXT, 333 TokenType.BLOB, 334 TokenType.MEDIUMBLOB, 335 TokenType.LONGBLOB, 336 TokenType.BINARY, 337 TokenType.VARBINARY, 338 TokenType.JSON, 339 TokenType.JSONB, 340 TokenType.INTERVAL, 341 TokenType.TINYBLOB, 342 TokenType.TINYTEXT, 343 TokenType.TIME, 344 TokenType.TIMETZ, 345 TokenType.TIMESTAMP, 346 TokenType.TIMESTAMP_S, 347 TokenType.TIMESTAMP_MS, 348 TokenType.TIMESTAMP_NS, 349 TokenType.TIMESTAMPTZ, 350 TokenType.TIMESTAMPLTZ, 351 TokenType.TIMESTAMPNTZ, 352 TokenType.DATETIME, 353 TokenType.DATETIME2, 354 TokenType.DATETIME64, 355 TokenType.SMALLDATETIME, 356 TokenType.DATE, 357 TokenType.DATE32, 358 TokenType.INT4RANGE, 359 TokenType.INT4MULTIRANGE, 360 TokenType.INT8RANGE, 361 TokenType.INT8MULTIRANGE, 362 TokenType.NUMRANGE, 363 TokenType.NUMMULTIRANGE, 364 TokenType.TSRANGE, 365 TokenType.TSMULTIRANGE, 366 TokenType.TSTZRANGE, 367 TokenType.TSTZMULTIRANGE, 368 TokenType.DATERANGE, 369 TokenType.DATEMULTIRANGE, 370 TokenType.DECIMAL, 371 TokenType.DECIMAL32, 372 TokenType.DECIMAL64, 373 TokenType.DECIMAL128, 374 TokenType.DECIMAL256, 375 TokenType.UDECIMAL, 376 TokenType.BIGDECIMAL, 377 TokenType.UUID, 378 TokenType.GEOGRAPHY, 379 TokenType.GEOMETRY, 380 TokenType.POINT, 381 TokenType.RING, 382 TokenType.LINESTRING, 383 TokenType.MULTILINESTRING, 384 TokenType.POLYGON, 385 TokenType.MULTIPOLYGON, 386 TokenType.HLLSKETCH, 387 TokenType.HSTORE, 388 TokenType.PSEUDO_TYPE, 389 TokenType.SUPER, 390 TokenType.SERIAL, 391 TokenType.SMALLSERIAL, 392 TokenType.BIGSERIAL, 393 TokenType.XML, 394 TokenType.YEAR, 395 TokenType.USERDEFINED, 396 TokenType.MONEY, 397 TokenType.SMALLMONEY, 398 TokenType.ROWVERSION, 399 TokenType.IMAGE, 400 TokenType.VARIANT, 401 TokenType.VECTOR, 402 TokenType.VOID, 403 TokenType.OBJECT, 404 TokenType.OBJECT_IDENTIFIER, 405 TokenType.INET, 406 TokenType.IPADDRESS, 407 TokenType.IPPREFIX, 408 TokenType.IPV4, 409 TokenType.IPV6, 410 TokenType.UNKNOWN, 411 TokenType.NOTHING, 412 TokenType.NULL, 413 TokenType.NAME, 414 TokenType.TDIGEST, 415 TokenType.DYNAMIC, 416 *ENUM_TYPE_TOKENS, 417 *NESTED_TYPE_TOKENS, 418 *AGGREGATE_TYPE_TOKENS, 419 } 420 421 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 422 TokenType.BIGINT: TokenType.UBIGINT, 423 TokenType.INT: TokenType.UINT, 424 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 425 TokenType.SMALLINT: TokenType.USMALLINT, 426 TokenType.TINYINT: TokenType.UTINYINT, 427 TokenType.DECIMAL: TokenType.UDECIMAL, 428 TokenType.DOUBLE: TokenType.UDOUBLE, 429 } 430 431 SUBQUERY_PREDICATES = { 432 TokenType.ANY: exp.Any, 433 TokenType.ALL: exp.All, 434 TokenType.EXISTS: exp.Exists, 435 TokenType.SOME: exp.Any, 436 } 437 438 RESERVED_TOKENS = { 439 *Tokenizer.SINGLE_TOKENS.values(), 440 TokenType.SELECT, 441 } - {TokenType.IDENTIFIER} 442 443 DB_CREATABLES = { 444 TokenType.DATABASE, 445 TokenType.DICTIONARY, 446 TokenType.FILE_FORMAT, 447 TokenType.MODEL, 448 TokenType.NAMESPACE, 449 TokenType.SCHEMA, 450 TokenType.SEQUENCE, 451 TokenType.SINK, 452 TokenType.SOURCE, 453 TokenType.STAGE, 454 TokenType.STORAGE_INTEGRATION, 455 TokenType.STREAMLIT, 456 TokenType.TABLE, 457 TokenType.TAG, 458 TokenType.VIEW, 459 TokenType.WAREHOUSE, 460 } 461 462 CREATABLES = { 463 TokenType.COLUMN, 464 TokenType.CONSTRAINT, 465 TokenType.FOREIGN_KEY, 466 TokenType.FUNCTION, 467 TokenType.INDEX, 468 TokenType.PROCEDURE, 469 *DB_CREATABLES, 470 } 471 472 ALTERABLES = { 473 TokenType.INDEX, 474 TokenType.TABLE, 475 TokenType.VIEW, 476 } 477 478 # Tokens that can represent identifiers 479 ID_VAR_TOKENS = { 480 TokenType.ALL, 481 TokenType.ATTACH, 482 TokenType.VAR, 483 TokenType.ANTI, 484 TokenType.APPLY, 485 TokenType.ASC, 486 TokenType.ASOF, 487 TokenType.AUTO_INCREMENT, 488 TokenType.BEGIN, 489 TokenType.BPCHAR, 490 TokenType.CACHE, 491 TokenType.CASE, 492 TokenType.COLLATE, 493 TokenType.COMMAND, 494 TokenType.COMMENT, 495 TokenType.COMMIT, 496 TokenType.CONSTRAINT, 497 TokenType.COPY, 498 TokenType.CUBE, 499 TokenType.CURRENT_SCHEMA, 500 TokenType.DEFAULT, 501 TokenType.DELETE, 502 TokenType.DESC, 503 TokenType.DESCRIBE, 504 TokenType.DETACH, 505 TokenType.DICTIONARY, 506 TokenType.DIV, 507 TokenType.END, 508 TokenType.EXECUTE, 509 TokenType.EXPORT, 510 TokenType.ESCAPE, 511 TokenType.FALSE, 512 TokenType.FIRST, 513 TokenType.FILTER, 514 TokenType.FINAL, 515 TokenType.FORMAT, 516 TokenType.FULL, 517 TokenType.GET, 518 TokenType.IDENTIFIER, 519 TokenType.IS, 520 TokenType.ISNULL, 521 TokenType.INTERVAL, 522 TokenType.KEEP, 523 TokenType.KILL, 524 TokenType.LEFT, 525 TokenType.LIMIT, 526 TokenType.LOAD, 527 TokenType.MERGE, 528 TokenType.NATURAL, 529 TokenType.NEXT, 530 TokenType.OFFSET, 531 TokenType.OPERATOR, 532 TokenType.ORDINALITY, 533 TokenType.OVERLAPS, 534 TokenType.OVERWRITE, 535 TokenType.PARTITION, 536 TokenType.PERCENT, 537 TokenType.PIVOT, 538 TokenType.PRAGMA, 539 TokenType.PUT, 540 TokenType.RANGE, 541 TokenType.RECURSIVE, 542 TokenType.REFERENCES, 543 TokenType.REFRESH, 544 TokenType.RENAME, 545 TokenType.REPLACE, 546 TokenType.RIGHT, 547 TokenType.ROLLUP, 548 TokenType.ROW, 549 TokenType.ROWS, 550 TokenType.SEMI, 551 TokenType.SET, 552 TokenType.SETTINGS, 553 TokenType.SHOW, 554 TokenType.TEMPORARY, 555 TokenType.TOP, 556 TokenType.TRUE, 557 TokenType.TRUNCATE, 558 TokenType.UNIQUE, 559 TokenType.UNNEST, 560 TokenType.UNPIVOT, 561 TokenType.UPDATE, 562 TokenType.USE, 563 TokenType.VOLATILE, 564 TokenType.WINDOW, 565 *CREATABLES, 566 *SUBQUERY_PREDICATES, 567 *TYPE_TOKENS, 568 *NO_PAREN_FUNCTIONS, 569 } 570 ID_VAR_TOKENS.remove(TokenType.UNION) 571 572 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 573 TokenType.ANTI, 574 TokenType.APPLY, 575 TokenType.ASOF, 576 TokenType.FULL, 577 TokenType.LEFT, 578 TokenType.LOCK, 579 TokenType.NATURAL, 580 TokenType.RIGHT, 581 TokenType.SEMI, 582 TokenType.WINDOW, 583 } 584 585 ALIAS_TOKENS = ID_VAR_TOKENS 586 587 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 588 589 ARRAY_CONSTRUCTORS = { 590 "ARRAY": exp.Array, 591 "LIST": exp.List, 592 } 593 594 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 595 596 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 597 598 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 599 600 FUNC_TOKENS = { 601 TokenType.COLLATE, 602 TokenType.COMMAND, 603 TokenType.CURRENT_DATE, 604 TokenType.CURRENT_DATETIME, 605 TokenType.CURRENT_SCHEMA, 606 TokenType.CURRENT_TIMESTAMP, 607 TokenType.CURRENT_TIME, 608 TokenType.CURRENT_USER, 609 TokenType.FILTER, 610 TokenType.FIRST, 611 TokenType.FORMAT, 612 TokenType.GET, 613 TokenType.GLOB, 614 TokenType.IDENTIFIER, 615 TokenType.INDEX, 616 TokenType.ISNULL, 617 TokenType.ILIKE, 618 TokenType.INSERT, 619 TokenType.LIKE, 620 TokenType.MERGE, 621 TokenType.NEXT, 622 TokenType.OFFSET, 623 TokenType.PRIMARY_KEY, 624 TokenType.RANGE, 625 TokenType.REPLACE, 626 TokenType.RLIKE, 627 TokenType.ROW, 628 TokenType.UNNEST, 629 TokenType.VAR, 630 TokenType.LEFT, 631 TokenType.RIGHT, 632 TokenType.SEQUENCE, 633 TokenType.DATE, 634 TokenType.DATETIME, 635 TokenType.TABLE, 636 TokenType.TIMESTAMP, 637 TokenType.TIMESTAMPTZ, 638 TokenType.TRUNCATE, 639 TokenType.WINDOW, 640 TokenType.XOR, 641 *TYPE_TOKENS, 642 *SUBQUERY_PREDICATES, 643 } 644 645 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.AND: exp.And, 647 } 648 649 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.COLON_EQ: exp.PropertyEQ, 651 } 652 653 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.OR: exp.Or, 655 } 656 657 EQUALITY = { 658 TokenType.EQ: exp.EQ, 659 TokenType.NEQ: exp.NEQ, 660 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 661 } 662 663 COMPARISON = { 664 TokenType.GT: exp.GT, 665 TokenType.GTE: exp.GTE, 666 TokenType.LT: exp.LT, 667 TokenType.LTE: exp.LTE, 668 } 669 670 BITWISE = { 671 TokenType.AMP: exp.BitwiseAnd, 672 TokenType.CARET: exp.BitwiseXor, 673 TokenType.PIPE: exp.BitwiseOr, 674 } 675 676 TERM = { 677 TokenType.DASH: exp.Sub, 678 TokenType.PLUS: exp.Add, 679 TokenType.MOD: exp.Mod, 680 TokenType.COLLATE: exp.Collate, 681 } 682 683 FACTOR = { 684 TokenType.DIV: exp.IntDiv, 685 TokenType.LR_ARROW: exp.Distance, 686 TokenType.SLASH: exp.Div, 687 TokenType.STAR: exp.Mul, 688 } 689 690 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 691 692 TIMES = { 693 TokenType.TIME, 694 TokenType.TIMETZ, 695 } 696 697 TIMESTAMPS = { 698 TokenType.TIMESTAMP, 699 TokenType.TIMESTAMPNTZ, 700 TokenType.TIMESTAMPTZ, 701 TokenType.TIMESTAMPLTZ, 702 *TIMES, 703 } 704 705 SET_OPERATIONS = { 706 TokenType.UNION, 707 TokenType.INTERSECT, 708 TokenType.EXCEPT, 709 } 710 711 JOIN_METHODS = { 712 TokenType.ASOF, 713 TokenType.NATURAL, 714 TokenType.POSITIONAL, 715 } 716 717 JOIN_SIDES = { 718 TokenType.LEFT, 719 TokenType.RIGHT, 720 TokenType.FULL, 721 } 722 723 JOIN_KINDS = { 724 TokenType.ANTI, 725 TokenType.CROSS, 726 TokenType.INNER, 727 TokenType.OUTER, 728 TokenType.SEMI, 729 TokenType.STRAIGHT_JOIN, 730 } 731 732 JOIN_HINTS: t.Set[str] = set() 733 734 LAMBDAS = { 735 TokenType.ARROW: lambda self, expressions: self.expression( 736 exp.Lambda, 737 this=self._replace_lambda( 738 self._parse_assignment(), 739 expressions, 740 ), 741 expressions=expressions, 742 ), 743 TokenType.FARROW: lambda self, expressions: self.expression( 744 exp.Kwarg, 745 this=exp.var(expressions[0].name), 746 expression=self._parse_assignment(), 747 ), 748 } 749 750 COLUMN_OPERATORS = { 751 TokenType.DOT: None, 752 TokenType.DOTCOLON: lambda self, this, to: self.expression( 753 exp.JSONCast, 754 this=this, 755 to=to, 756 ), 757 TokenType.DCOLON: lambda self, this, to: self.expression( 758 exp.Cast if self.STRICT_CAST else exp.TryCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.ARROW: lambda self, this, path: self.expression( 763 exp.JSONExtract, 764 this=this, 765 expression=self.dialect.to_json_path(path), 766 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 767 ), 768 TokenType.DARROW: lambda self, this, path: self.expression( 769 exp.JSONExtractScalar, 770 this=this, 771 expression=self.dialect.to_json_path(path), 772 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 773 ), 774 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 775 exp.JSONBExtract, 776 this=this, 777 expression=path, 778 ), 779 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtractScalar, 781 this=this, 782 expression=path, 783 ), 784 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 785 exp.JSONBContains, 786 this=this, 787 expression=key, 788 ), 789 } 790 791 EXPRESSION_PARSERS = { 792 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 793 exp.Column: lambda self: self._parse_column(), 794 exp.Condition: lambda self: self._parse_assignment(), 795 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 796 exp.Expression: lambda self: self._parse_expression(), 797 exp.From: lambda self: self._parse_from(joins=True), 798 exp.Group: lambda self: self._parse_group(), 799 exp.Having: lambda self: self._parse_having(), 800 exp.Hint: lambda self: self._parse_hint_body(), 801 exp.Identifier: lambda self: self._parse_id_var(), 802 exp.Join: lambda self: self._parse_join(), 803 exp.Lambda: lambda self: self._parse_lambda(), 804 exp.Lateral: lambda self: self._parse_lateral(), 805 exp.Limit: lambda self: self._parse_limit(), 806 exp.Offset: lambda self: self._parse_offset(), 807 exp.Order: lambda self: self._parse_order(), 808 exp.Ordered: lambda self: self._parse_ordered(), 809 exp.Properties: lambda self: self._parse_properties(), 810 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 811 exp.Qualify: lambda self: self._parse_qualify(), 812 exp.Returning: lambda self: self._parse_returning(), 813 exp.Select: lambda self: self._parse_select(), 814 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 815 exp.Table: lambda self: self._parse_table_parts(), 816 exp.TableAlias: lambda self: self._parse_table_alias(), 817 exp.Tuple: lambda self: self._parse_value(values=False), 818 exp.Whens: lambda self: self._parse_when_matched(), 819 exp.Where: lambda self: self._parse_where(), 820 exp.Window: lambda self: self._parse_named_window(), 821 exp.With: lambda self: self._parse_with(), 822 "JOIN_TYPE": lambda self: self._parse_join_parts(), 823 } 824 825 STATEMENT_PARSERS = { 826 TokenType.ALTER: lambda self: self._parse_alter(), 827 TokenType.ANALYZE: lambda self: self._parse_analyze(), 828 TokenType.BEGIN: lambda self: self._parse_transaction(), 829 TokenType.CACHE: lambda self: self._parse_cache(), 830 TokenType.COMMENT: lambda self: self._parse_comment(), 831 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 832 TokenType.COPY: lambda self: self._parse_copy(), 833 TokenType.CREATE: lambda self: self._parse_create(), 834 TokenType.DELETE: lambda self: self._parse_delete(), 835 TokenType.DESC: lambda self: self._parse_describe(), 836 TokenType.DESCRIBE: lambda self: self._parse_describe(), 837 TokenType.DROP: lambda self: self._parse_drop(), 838 TokenType.GRANT: lambda self: self._parse_grant(), 839 TokenType.INSERT: lambda self: self._parse_insert(), 840 TokenType.KILL: lambda self: self._parse_kill(), 841 TokenType.LOAD: lambda self: self._parse_load(), 842 TokenType.MERGE: lambda self: self._parse_merge(), 843 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 844 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 845 TokenType.REFRESH: lambda self: self._parse_refresh(), 846 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 847 TokenType.SET: lambda self: self._parse_set(), 848 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 849 TokenType.UNCACHE: lambda self: self._parse_uncache(), 850 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 851 TokenType.UPDATE: lambda self: self._parse_update(), 852 TokenType.USE: lambda self: self._parse_use(), 853 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 854 } 855 856 UNARY_PARSERS = { 857 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 858 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 859 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 860 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 861 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 862 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 863 } 864 865 STRING_PARSERS = { 866 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 867 exp.RawString, this=token.text 868 ), 869 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 870 exp.National, this=token.text 871 ), 872 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 873 TokenType.STRING: lambda self, token: self.expression( 874 exp.Literal, this=token.text, is_string=True 875 ), 876 TokenType.UNICODE_STRING: lambda self, token: self.expression( 877 exp.UnicodeString, 878 this=token.text, 879 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 880 ), 881 } 882 883 NUMERIC_PARSERS = { 884 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 885 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 886 TokenType.HEX_STRING: lambda self, token: self.expression( 887 exp.HexString, 888 this=token.text, 889 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 890 ), 891 TokenType.NUMBER: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=False 893 ), 894 } 895 896 PRIMARY_PARSERS = { 897 **STRING_PARSERS, 898 **NUMERIC_PARSERS, 899 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 900 TokenType.NULL: lambda self, _: self.expression(exp.Null), 901 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 902 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 903 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 904 TokenType.STAR: lambda self, _: self._parse_star_ops(), 905 } 906 907 PLACEHOLDER_PARSERS = { 908 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 909 TokenType.PARAMETER: lambda self: self._parse_parameter(), 910 TokenType.COLON: lambda self: ( 911 self.expression(exp.Placeholder, this=self._prev.text) 912 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 913 else None 914 ), 915 } 916 917 RANGE_PARSERS = { 918 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 919 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 920 TokenType.GLOB: binary_range_parser(exp.Glob), 921 TokenType.ILIKE: binary_range_parser(exp.ILike), 922 TokenType.IN: lambda self, this: self._parse_in(this), 923 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 924 TokenType.IS: lambda self, this: self._parse_is(this), 925 TokenType.LIKE: binary_range_parser(exp.Like), 926 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 927 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 928 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 929 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 930 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 931 } 932 933 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 934 "ALLOWED_VALUES": lambda self: self.expression( 935 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 936 ), 937 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 938 "AUTO": lambda self: self._parse_auto_property(), 939 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 940 "BACKUP": lambda self: self.expression( 941 exp.BackupProperty, this=self._parse_var(any_token=True) 942 ), 943 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 944 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 945 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 946 "CHECKSUM": lambda self: self._parse_checksum(), 947 "CLUSTER BY": lambda self: self._parse_cluster(), 948 "CLUSTERED": lambda self: self._parse_clustered_by(), 949 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 950 exp.CollateProperty, **kwargs 951 ), 952 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 953 "CONTAINS": lambda self: self._parse_contains_property(), 954 "COPY": lambda self: self._parse_copy_property(), 955 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 956 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 957 "DEFINER": lambda self: self._parse_definer(), 958 "DETERMINISTIC": lambda self: self.expression( 959 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 960 ), 961 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 962 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 963 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 964 "DISTKEY": lambda self: self._parse_distkey(), 965 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 966 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 967 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 968 "ENVIRONMENT": lambda self: self.expression( 969 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 970 ), 971 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 972 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 973 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 974 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 975 "FREESPACE": lambda self: self._parse_freespace(), 976 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 977 "HEAP": lambda self: self.expression(exp.HeapProperty), 978 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 979 "IMMUTABLE": lambda self: self.expression( 980 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 981 ), 982 "INHERITS": lambda self: self.expression( 983 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 984 ), 985 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 986 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 987 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 988 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 989 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 990 "LIKE": lambda self: self._parse_create_like(), 991 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 992 "LOCK": lambda self: self._parse_locking(), 993 "LOCKING": lambda self: self._parse_locking(), 994 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 995 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 996 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 997 "MODIFIES": lambda self: self._parse_modifies_property(), 998 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 999 "NO": lambda self: self._parse_no_property(), 1000 "ON": lambda self: self._parse_on_property(), 1001 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1002 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1003 "PARTITION": lambda self: self._parse_partitioned_of(), 1004 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1005 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1006 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1007 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1008 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1009 "READS": lambda self: self._parse_reads_property(), 1010 "REMOTE": lambda self: self._parse_remote_with_connection(), 1011 "RETURNS": lambda self: self._parse_returns(), 1012 "STRICT": lambda self: self.expression(exp.StrictProperty), 1013 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1014 "ROW": lambda self: self._parse_row(), 1015 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1016 "SAMPLE": lambda self: self.expression( 1017 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1018 ), 1019 "SECURE": lambda self: self.expression(exp.SecureProperty), 1020 "SECURITY": lambda self: self._parse_security(), 1021 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1022 "SETTINGS": lambda self: self._parse_settings_property(), 1023 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1024 "SORTKEY": lambda self: self._parse_sortkey(), 1025 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1026 "STABLE": lambda self: self.expression( 1027 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1028 ), 1029 "STORED": lambda self: self._parse_stored(), 1030 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1031 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1032 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1033 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1034 "TO": lambda self: self._parse_to_table(), 1035 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1036 "TRANSFORM": lambda self: self.expression( 1037 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1038 ), 1039 "TTL": lambda self: self._parse_ttl(), 1040 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1041 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1042 "VOLATILE": lambda self: self._parse_volatile_property(), 1043 "WITH": lambda self: self._parse_with_property(), 1044 } 1045 1046 CONSTRAINT_PARSERS = { 1047 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1048 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1049 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1050 "CHARACTER SET": lambda self: self.expression( 1051 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1052 ), 1053 "CHECK": lambda self: self.expression( 1054 exp.CheckColumnConstraint, 1055 this=self._parse_wrapped(self._parse_assignment), 1056 enforced=self._match_text_seq("ENFORCED"), 1057 ), 1058 "COLLATE": lambda self: self.expression( 1059 exp.CollateColumnConstraint, 1060 this=self._parse_identifier() or self._parse_column(), 1061 ), 1062 "COMMENT": lambda self: self.expression( 1063 exp.CommentColumnConstraint, this=self._parse_string() 1064 ), 1065 "COMPRESS": lambda self: self._parse_compress(), 1066 "CLUSTERED": lambda self: self.expression( 1067 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1068 ), 1069 "NONCLUSTERED": lambda self: self.expression( 1070 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1071 ), 1072 "DEFAULT": lambda self: self.expression( 1073 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1074 ), 1075 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1076 "EPHEMERAL": lambda self: self.expression( 1077 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1078 ), 1079 "EXCLUDE": lambda self: self.expression( 1080 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1081 ), 1082 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1083 "FORMAT": lambda self: self.expression( 1084 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1085 ), 1086 "GENERATED": lambda self: self._parse_generated_as_identity(), 1087 "IDENTITY": lambda self: self._parse_auto_increment(), 1088 "INLINE": lambda self: self._parse_inline(), 1089 "LIKE": lambda self: self._parse_create_like(), 1090 "NOT": lambda self: self._parse_not_constraint(), 1091 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1092 "ON": lambda self: ( 1093 self._match(TokenType.UPDATE) 1094 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1095 ) 1096 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1097 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1098 "PERIOD": lambda self: self._parse_period_for_system_time(), 1099 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1100 "REFERENCES": lambda self: self._parse_references(match=False), 1101 "TITLE": lambda self: self.expression( 1102 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1103 ), 1104 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1105 "UNIQUE": lambda self: self._parse_unique(), 1106 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1107 "WATERMARK": lambda self: self.expression( 1108 exp.WatermarkColumnConstraint, 1109 this=self._match(TokenType.FOR) and self._parse_column(), 1110 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1111 ), 1112 "WITH": lambda self: self.expression( 1113 exp.Properties, expressions=self._parse_wrapped_properties() 1114 ), 1115 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1116 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1117 } 1118 1119 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1120 klass = ( 1121 exp.PartitionedByBucket 1122 if self._prev.text.upper() == "BUCKET" 1123 else exp.PartitionByTruncate 1124 ) 1125 1126 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1127 this, expression = seq_get(args, 0), seq_get(args, 1) 1128 1129 if isinstance(this, exp.Literal): 1130 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1131 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1132 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1133 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1134 # 1135 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1136 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1137 this, expression = expression, this 1138 1139 return self.expression(klass, this=this, expression=expression) 1140 1141 ALTER_PARSERS = { 1142 "ADD": lambda self: self._parse_alter_table_add(), 1143 "AS": lambda self: self._parse_select(), 1144 "ALTER": lambda self: self._parse_alter_table_alter(), 1145 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1146 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1147 "DROP": lambda self: self._parse_alter_table_drop(), 1148 "RENAME": lambda self: self._parse_alter_table_rename(), 1149 "SET": lambda self: self._parse_alter_table_set(), 1150 "SWAP": lambda self: self.expression( 1151 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1152 ), 1153 } 1154 1155 ALTER_ALTER_PARSERS = { 1156 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1157 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1158 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1159 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1160 } 1161 1162 SCHEMA_UNNAMED_CONSTRAINTS = { 1163 "CHECK", 1164 "EXCLUDE", 1165 "FOREIGN KEY", 1166 "LIKE", 1167 "PERIOD", 1168 "PRIMARY KEY", 1169 "UNIQUE", 1170 "WATERMARK", 1171 "BUCKET", 1172 "TRUNCATE", 1173 } 1174 1175 NO_PAREN_FUNCTION_PARSERS = { 1176 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1177 "CASE": lambda self: self._parse_case(), 1178 "CONNECT_BY_ROOT": lambda self: self.expression( 1179 exp.ConnectByRoot, this=self._parse_column() 1180 ), 1181 "IF": lambda self: self._parse_if(), 1182 } 1183 1184 INVALID_FUNC_NAME_TOKENS = { 1185 TokenType.IDENTIFIER, 1186 TokenType.STRING, 1187 } 1188 1189 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1190 1191 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1192 1193 FUNCTION_PARSERS = { 1194 **{ 1195 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1196 }, 1197 **{ 1198 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1199 }, 1200 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1201 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1202 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1203 "DECODE": lambda self: self._parse_decode(), 1204 "EXTRACT": lambda self: self._parse_extract(), 1205 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1206 "GAP_FILL": lambda self: self._parse_gap_fill(), 1207 "JSON_OBJECT": lambda self: self._parse_json_object(), 1208 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1209 "JSON_TABLE": lambda self: self._parse_json_table(), 1210 "MATCH": lambda self: self._parse_match_against(), 1211 "NORMALIZE": lambda self: self._parse_normalize(), 1212 "OPENJSON": lambda self: self._parse_open_json(), 1213 "OVERLAY": lambda self: self._parse_overlay(), 1214 "POSITION": lambda self: self._parse_position(), 1215 "PREDICT": lambda self: self._parse_predict(), 1216 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1217 "STRING_AGG": lambda self: self._parse_string_agg(), 1218 "SUBSTRING": lambda self: self._parse_substring(), 1219 "TRIM": lambda self: self._parse_trim(), 1220 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1221 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1222 "XMLELEMENT": lambda self: self.expression( 1223 exp.XMLElement, 1224 this=self._match_text_seq("NAME") and self._parse_id_var(), 1225 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1226 ), 1227 "XMLTABLE": lambda self: self._parse_xml_table(), 1228 } 1229 1230 QUERY_MODIFIER_PARSERS = { 1231 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1232 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1233 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1234 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1235 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1236 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1237 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1238 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1239 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1240 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1241 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1242 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1243 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1244 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1245 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1246 TokenType.CLUSTER_BY: lambda self: ( 1247 "cluster", 1248 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1249 ), 1250 TokenType.DISTRIBUTE_BY: lambda self: ( 1251 "distribute", 1252 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1253 ), 1254 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1255 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1256 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1257 } 1258 1259 SET_PARSERS = { 1260 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1261 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1262 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1263 "TRANSACTION": lambda self: self._parse_set_transaction(), 1264 } 1265 1266 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1267 1268 TYPE_LITERAL_PARSERS = { 1269 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1270 } 1271 1272 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1273 1274 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1275 1276 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1277 1278 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1279 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1280 "ISOLATION": ( 1281 ("LEVEL", "REPEATABLE", "READ"), 1282 ("LEVEL", "READ", "COMMITTED"), 1283 ("LEVEL", "READ", "UNCOMITTED"), 1284 ("LEVEL", "SERIALIZABLE"), 1285 ), 1286 "READ": ("WRITE", "ONLY"), 1287 } 1288 1289 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1290 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1291 ) 1292 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1293 1294 CREATE_SEQUENCE: OPTIONS_TYPE = { 1295 "SCALE": ("EXTEND", "NOEXTEND"), 1296 "SHARD": ("EXTEND", "NOEXTEND"), 1297 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1298 **dict.fromkeys( 1299 ( 1300 "SESSION", 1301 "GLOBAL", 1302 "KEEP", 1303 "NOKEEP", 1304 "ORDER", 1305 "NOORDER", 1306 "NOCACHE", 1307 "CYCLE", 1308 "NOCYCLE", 1309 "NOMINVALUE", 1310 "NOMAXVALUE", 1311 "NOSCALE", 1312 "NOSHARD", 1313 ), 1314 tuple(), 1315 ), 1316 } 1317 1318 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1319 1320 USABLES: OPTIONS_TYPE = dict.fromkeys( 1321 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1322 ) 1323 1324 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1325 1326 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1327 "TYPE": ("EVOLUTION",), 1328 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1329 } 1330 1331 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1332 1333 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1334 1335 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1336 "NOT": ("ENFORCED",), 1337 "MATCH": ( 1338 "FULL", 1339 "PARTIAL", 1340 "SIMPLE", 1341 ), 1342 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1343 "USING": ( 1344 "BTREE", 1345 "HASH", 1346 ), 1347 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1348 } 1349 1350 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1351 "NO": ("OTHERS",), 1352 "CURRENT": ("ROW",), 1353 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1354 } 1355 1356 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1357 1358 CLONE_KEYWORDS = {"CLONE", "COPY"} 1359 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1360 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1361 1362 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1363 1364 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1365 1366 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1367 1368 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1369 1370 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1371 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1372 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1373 1374 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1375 1376 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1377 1378 ADD_CONSTRAINT_TOKENS = { 1379 TokenType.CONSTRAINT, 1380 TokenType.FOREIGN_KEY, 1381 TokenType.INDEX, 1382 TokenType.KEY, 1383 TokenType.PRIMARY_KEY, 1384 TokenType.UNIQUE, 1385 } 1386 1387 DISTINCT_TOKENS = {TokenType.DISTINCT} 1388 1389 NULL_TOKENS = {TokenType.NULL} 1390 1391 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1392 1393 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1394 1395 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1396 1397 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1398 1399 ODBC_DATETIME_LITERALS = { 1400 "d": exp.Date, 1401 "t": exp.Time, 1402 "ts": exp.Timestamp, 1403 } 1404 1405 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1406 1407 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1408 1409 # The style options for the DESCRIBE statement 1410 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1411 1412 # The style options for the ANALYZE statement 1413 ANALYZE_STYLES = { 1414 "BUFFER_USAGE_LIMIT", 1415 "FULL", 1416 "LOCAL", 1417 "NO_WRITE_TO_BINLOG", 1418 "SAMPLE", 1419 "SKIP_LOCKED", 1420 "VERBOSE", 1421 } 1422 1423 ANALYZE_EXPRESSION_PARSERS = { 1424 "ALL": lambda self: self._parse_analyze_columns(), 1425 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1426 "DELETE": lambda self: self._parse_analyze_delete(), 1427 "DROP": lambda self: self._parse_analyze_histogram(), 1428 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1429 "LIST": lambda self: self._parse_analyze_list(), 1430 "PREDICATE": lambda self: self._parse_analyze_columns(), 1431 "UPDATE": lambda self: self._parse_analyze_histogram(), 1432 "VALIDATE": lambda self: self._parse_analyze_validate(), 1433 } 1434 1435 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1436 1437 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1438 1439 OPERATION_MODIFIERS: t.Set[str] = set() 1440 1441 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1442 1443 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1444 1445 STRICT_CAST = True 1446 1447 PREFIXED_PIVOT_COLUMNS = False 1448 IDENTIFY_PIVOT_STRINGS = False 1449 1450 LOG_DEFAULTS_TO_LN = False 1451 1452 # Whether ADD is present for each column added by ALTER TABLE 1453 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1454 1455 # Whether the table sample clause expects CSV syntax 1456 TABLESAMPLE_CSV = False 1457 1458 # The default method used for table sampling 1459 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1460 1461 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1462 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1463 1464 # Whether the TRIM function expects the characters to trim as its first argument 1465 TRIM_PATTERN_FIRST = False 1466 1467 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1468 STRING_ALIASES = False 1469 1470 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1471 MODIFIERS_ATTACHED_TO_SET_OP = True 1472 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1473 1474 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1475 NO_PAREN_IF_COMMANDS = True 1476 1477 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1478 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1479 1480 # Whether the `:` operator is used to extract a value from a VARIANT column 1481 COLON_IS_VARIANT_EXTRACT = False 1482 1483 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1484 # If this is True and '(' is not found, the keyword will be treated as an identifier 1485 VALUES_FOLLOWED_BY_PAREN = True 1486 1487 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1488 SUPPORTS_IMPLICIT_UNNEST = False 1489 1490 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1491 INTERVAL_SPANS = True 1492 1493 # Whether a PARTITION clause can follow a table reference 1494 SUPPORTS_PARTITION_SELECTION = False 1495 1496 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1497 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1498 1499 # Whether the 'AS' keyword is optional in the CTE definition syntax 1500 OPTIONAL_ALIAS_TOKEN_CTE = True 1501 1502 __slots__ = ( 1503 "error_level", 1504 "error_message_context", 1505 "max_errors", 1506 "dialect", 1507 "sql", 1508 "errors", 1509 "_tokens", 1510 "_index", 1511 "_curr", 1512 "_next", 1513 "_prev", 1514 "_prev_comments", 1515 ) 1516 1517 # Autofilled 1518 SHOW_TRIE: t.Dict = {} 1519 SET_TRIE: t.Dict = {} 1520 1521 def __init__( 1522 self, 1523 error_level: t.Optional[ErrorLevel] = None, 1524 error_message_context: int = 100, 1525 max_errors: int = 3, 1526 dialect: DialectType = None, 1527 ): 1528 from sqlglot.dialects import Dialect 1529 1530 self.error_level = error_level or ErrorLevel.IMMEDIATE 1531 self.error_message_context = error_message_context 1532 self.max_errors = max_errors 1533 self.dialect = Dialect.get_or_raise(dialect) 1534 self.reset() 1535 1536 def reset(self): 1537 self.sql = "" 1538 self.errors = [] 1539 self._tokens = [] 1540 self._index = 0 1541 self._curr = None 1542 self._next = None 1543 self._prev = None 1544 self._prev_comments = None 1545 1546 def parse( 1547 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1548 ) -> t.List[t.Optional[exp.Expression]]: 1549 """ 1550 Parses a list of tokens and returns a list of syntax trees, one tree 1551 per parsed SQL statement. 1552 1553 Args: 1554 raw_tokens: The list of tokens. 1555 sql: The original SQL string, used to produce helpful debug messages. 1556 1557 Returns: 1558 The list of the produced syntax trees. 1559 """ 1560 return self._parse( 1561 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1562 ) 1563 1564 def parse_into( 1565 self, 1566 expression_types: exp.IntoType, 1567 raw_tokens: t.List[Token], 1568 sql: t.Optional[str] = None, 1569 ) -> t.List[t.Optional[exp.Expression]]: 1570 """ 1571 Parses a list of tokens into a given Expression type. If a collection of Expression 1572 types is given instead, this method will try to parse the token list into each one 1573 of them, stopping at the first for which the parsing succeeds. 1574 1575 Args: 1576 expression_types: The expression type(s) to try and parse the token list into. 1577 raw_tokens: The list of tokens. 1578 sql: The original SQL string, used to produce helpful debug messages. 1579 1580 Returns: 1581 The target Expression. 1582 """ 1583 errors = [] 1584 for expression_type in ensure_list(expression_types): 1585 parser = self.EXPRESSION_PARSERS.get(expression_type) 1586 if not parser: 1587 raise TypeError(f"No parser registered for {expression_type}") 1588 1589 try: 1590 return self._parse(parser, raw_tokens, sql) 1591 except ParseError as e: 1592 e.errors[0]["into_expression"] = expression_type 1593 errors.append(e) 1594 1595 raise ParseError( 1596 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1597 errors=merge_errors(errors), 1598 ) from errors[-1] 1599 1600 def _parse( 1601 self, 1602 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1603 raw_tokens: t.List[Token], 1604 sql: t.Optional[str] = None, 1605 ) -> t.List[t.Optional[exp.Expression]]: 1606 self.reset() 1607 self.sql = sql or "" 1608 1609 total = len(raw_tokens) 1610 chunks: t.List[t.List[Token]] = [[]] 1611 1612 for i, token in enumerate(raw_tokens): 1613 if token.token_type == TokenType.SEMICOLON: 1614 if token.comments: 1615 chunks.append([token]) 1616 1617 if i < total - 1: 1618 chunks.append([]) 1619 else: 1620 chunks[-1].append(token) 1621 1622 expressions = [] 1623 1624 for tokens in chunks: 1625 self._index = -1 1626 self._tokens = tokens 1627 self._advance() 1628 1629 expressions.append(parse_method(self)) 1630 1631 if self._index < len(self._tokens): 1632 self.raise_error("Invalid expression / Unexpected token") 1633 1634 self.check_errors() 1635 1636 return expressions 1637 1638 def check_errors(self) -> None: 1639 """Logs or raises any found errors, depending on the chosen error level setting.""" 1640 if self.error_level == ErrorLevel.WARN: 1641 for error in self.errors: 1642 logger.error(str(error)) 1643 elif self.error_level == ErrorLevel.RAISE and self.errors: 1644 raise ParseError( 1645 concat_messages(self.errors, self.max_errors), 1646 errors=merge_errors(self.errors), 1647 ) 1648 1649 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1650 """ 1651 Appends an error in the list of recorded errors or raises it, depending on the chosen 1652 error level setting. 1653 """ 1654 token = token or self._curr or self._prev or Token.string("") 1655 start = token.start 1656 end = token.end + 1 1657 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1658 highlight = self.sql[start:end] 1659 end_context = self.sql[end : end + self.error_message_context] 1660 1661 error = ParseError.new( 1662 f"{message}. Line {token.line}, Col: {token.col}.\n" 1663 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1664 description=message, 1665 line=token.line, 1666 col=token.col, 1667 start_context=start_context, 1668 highlight=highlight, 1669 end_context=end_context, 1670 ) 1671 1672 if self.error_level == ErrorLevel.IMMEDIATE: 1673 raise error 1674 1675 self.errors.append(error) 1676 1677 def expression( 1678 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1679 ) -> E: 1680 """ 1681 Creates a new, validated Expression. 1682 1683 Args: 1684 exp_class: The expression class to instantiate. 1685 comments: An optional list of comments to attach to the expression. 1686 kwargs: The arguments to set for the expression along with their respective values. 1687 1688 Returns: 1689 The target expression. 1690 """ 1691 instance = exp_class(**kwargs) 1692 instance.add_comments(comments) if comments else self._add_comments(instance) 1693 return self.validate_expression(instance) 1694 1695 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1696 if expression and self._prev_comments: 1697 expression.add_comments(self._prev_comments) 1698 self._prev_comments = None 1699 1700 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1701 """ 1702 Validates an Expression, making sure that all its mandatory arguments are set. 1703 1704 Args: 1705 expression: The expression to validate. 1706 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1707 1708 Returns: 1709 The validated expression. 1710 """ 1711 if self.error_level != ErrorLevel.IGNORE: 1712 for error_message in expression.error_messages(args): 1713 self.raise_error(error_message) 1714 1715 return expression 1716 1717 def _find_sql(self, start: Token, end: Token) -> str: 1718 return self.sql[start.start : end.end + 1] 1719 1720 def _is_connected(self) -> bool: 1721 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1722 1723 def _advance(self, times: int = 1) -> None: 1724 self._index += times 1725 self._curr = seq_get(self._tokens, self._index) 1726 self._next = seq_get(self._tokens, self._index + 1) 1727 1728 if self._index > 0: 1729 self._prev = self._tokens[self._index - 1] 1730 self._prev_comments = self._prev.comments 1731 else: 1732 self._prev = None 1733 self._prev_comments = None 1734 1735 def _retreat(self, index: int) -> None: 1736 if index != self._index: 1737 self._advance(index - self._index) 1738 1739 def _warn_unsupported(self) -> None: 1740 if len(self._tokens) <= 1: 1741 return 1742 1743 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1744 # interested in emitting a warning for the one being currently processed. 1745 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1746 1747 logger.warning( 1748 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1749 ) 1750 1751 def _parse_command(self) -> exp.Command: 1752 self._warn_unsupported() 1753 return self.expression( 1754 exp.Command, 1755 comments=self._prev_comments, 1756 this=self._prev.text.upper(), 1757 expression=self._parse_string(), 1758 ) 1759 1760 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1761 """ 1762 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1763 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1764 solve this by setting & resetting the parser state accordingly 1765 """ 1766 index = self._index 1767 error_level = self.error_level 1768 1769 self.error_level = ErrorLevel.IMMEDIATE 1770 try: 1771 this = parse_method() 1772 except ParseError: 1773 this = None 1774 finally: 1775 if not this or retreat: 1776 self._retreat(index) 1777 self.error_level = error_level 1778 1779 return this 1780 1781 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1782 start = self._prev 1783 exists = self._parse_exists() if allow_exists else None 1784 1785 self._match(TokenType.ON) 1786 1787 materialized = self._match_text_seq("MATERIALIZED") 1788 kind = self._match_set(self.CREATABLES) and self._prev 1789 if not kind: 1790 return self._parse_as_command(start) 1791 1792 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1793 this = self._parse_user_defined_function(kind=kind.token_type) 1794 elif kind.token_type == TokenType.TABLE: 1795 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1796 elif kind.token_type == TokenType.COLUMN: 1797 this = self._parse_column() 1798 else: 1799 this = self._parse_id_var() 1800 1801 self._match(TokenType.IS) 1802 1803 return self.expression( 1804 exp.Comment, 1805 this=this, 1806 kind=kind.text, 1807 expression=self._parse_string(), 1808 exists=exists, 1809 materialized=materialized, 1810 ) 1811 1812 def _parse_to_table( 1813 self, 1814 ) -> exp.ToTableProperty: 1815 table = self._parse_table_parts(schema=True) 1816 return self.expression(exp.ToTableProperty, this=table) 1817 1818 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1819 def _parse_ttl(self) -> exp.Expression: 1820 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1821 this = self._parse_bitwise() 1822 1823 if self._match_text_seq("DELETE"): 1824 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1825 if self._match_text_seq("RECOMPRESS"): 1826 return self.expression( 1827 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1828 ) 1829 if self._match_text_seq("TO", "DISK"): 1830 return self.expression( 1831 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1832 ) 1833 if self._match_text_seq("TO", "VOLUME"): 1834 return self.expression( 1835 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1836 ) 1837 1838 return this 1839 1840 expressions = self._parse_csv(_parse_ttl_action) 1841 where = self._parse_where() 1842 group = self._parse_group() 1843 1844 aggregates = None 1845 if group and self._match(TokenType.SET): 1846 aggregates = self._parse_csv(self._parse_set_item) 1847 1848 return self.expression( 1849 exp.MergeTreeTTL, 1850 expressions=expressions, 1851 where=where, 1852 group=group, 1853 aggregates=aggregates, 1854 ) 1855 1856 def _parse_statement(self) -> t.Optional[exp.Expression]: 1857 if self._curr is None: 1858 return None 1859 1860 if self._match_set(self.STATEMENT_PARSERS): 1861 comments = self._prev_comments 1862 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1863 stmt.add_comments(comments, prepend=True) 1864 return stmt 1865 1866 if self._match_set(self.dialect.tokenizer.COMMANDS): 1867 return self._parse_command() 1868 1869 expression = self._parse_expression() 1870 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1871 return self._parse_query_modifiers(expression) 1872 1873 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1874 start = self._prev 1875 temporary = self._match(TokenType.TEMPORARY) 1876 materialized = self._match_text_seq("MATERIALIZED") 1877 1878 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1879 if not kind: 1880 return self._parse_as_command(start) 1881 1882 concurrently = self._match_text_seq("CONCURRENTLY") 1883 if_exists = exists or self._parse_exists() 1884 1885 if kind == "COLUMN": 1886 this = self._parse_column() 1887 else: 1888 this = self._parse_table_parts( 1889 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1890 ) 1891 1892 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1893 1894 if self._match(TokenType.L_PAREN, advance=False): 1895 expressions = self._parse_wrapped_csv(self._parse_types) 1896 else: 1897 expressions = None 1898 1899 return self.expression( 1900 exp.Drop, 1901 exists=if_exists, 1902 this=this, 1903 expressions=expressions, 1904 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1905 temporary=temporary, 1906 materialized=materialized, 1907 cascade=self._match_text_seq("CASCADE"), 1908 constraints=self._match_text_seq("CONSTRAINTS"), 1909 purge=self._match_text_seq("PURGE"), 1910 cluster=cluster, 1911 concurrently=concurrently, 1912 ) 1913 1914 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1915 return ( 1916 self._match_text_seq("IF") 1917 and (not not_ or self._match(TokenType.NOT)) 1918 and self._match(TokenType.EXISTS) 1919 ) 1920 1921 def _parse_create(self) -> exp.Create | exp.Command: 1922 # Note: this can't be None because we've matched a statement parser 1923 start = self._prev 1924 1925 replace = ( 1926 start.token_type == TokenType.REPLACE 1927 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1928 or self._match_pair(TokenType.OR, TokenType.ALTER) 1929 ) 1930 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1931 1932 unique = self._match(TokenType.UNIQUE) 1933 1934 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1935 clustered = True 1936 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1937 "COLUMNSTORE" 1938 ): 1939 clustered = False 1940 else: 1941 clustered = None 1942 1943 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1944 self._advance() 1945 1946 properties = None 1947 create_token = self._match_set(self.CREATABLES) and self._prev 1948 1949 if not create_token: 1950 # exp.Properties.Location.POST_CREATE 1951 properties = self._parse_properties() 1952 create_token = self._match_set(self.CREATABLES) and self._prev 1953 1954 if not properties or not create_token: 1955 return self._parse_as_command(start) 1956 1957 concurrently = self._match_text_seq("CONCURRENTLY") 1958 exists = self._parse_exists(not_=True) 1959 this = None 1960 expression: t.Optional[exp.Expression] = None 1961 indexes = None 1962 no_schema_binding = None 1963 begin = None 1964 end = None 1965 clone = None 1966 1967 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1968 nonlocal properties 1969 if properties and temp_props: 1970 properties.expressions.extend(temp_props.expressions) 1971 elif temp_props: 1972 properties = temp_props 1973 1974 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1975 this = self._parse_user_defined_function(kind=create_token.token_type) 1976 1977 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1978 extend_props(self._parse_properties()) 1979 1980 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1981 extend_props(self._parse_properties()) 1982 1983 if not expression: 1984 if self._match(TokenType.COMMAND): 1985 expression = self._parse_as_command(self._prev) 1986 else: 1987 begin = self._match(TokenType.BEGIN) 1988 return_ = self._match_text_seq("RETURN") 1989 1990 if self._match(TokenType.STRING, advance=False): 1991 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1992 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1993 expression = self._parse_string() 1994 extend_props(self._parse_properties()) 1995 else: 1996 expression = self._parse_user_defined_function_expression() 1997 1998 end = self._match_text_seq("END") 1999 2000 if return_: 2001 expression = self.expression(exp.Return, this=expression) 2002 elif create_token.token_type == TokenType.INDEX: 2003 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2004 if not self._match(TokenType.ON): 2005 index = self._parse_id_var() 2006 anonymous = False 2007 else: 2008 index = None 2009 anonymous = True 2010 2011 this = self._parse_index(index=index, anonymous=anonymous) 2012 elif create_token.token_type in self.DB_CREATABLES: 2013 table_parts = self._parse_table_parts( 2014 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2015 ) 2016 2017 # exp.Properties.Location.POST_NAME 2018 self._match(TokenType.COMMA) 2019 extend_props(self._parse_properties(before=True)) 2020 2021 this = self._parse_schema(this=table_parts) 2022 2023 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2024 extend_props(self._parse_properties()) 2025 2026 has_alias = self._match(TokenType.ALIAS) 2027 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2028 # exp.Properties.Location.POST_ALIAS 2029 extend_props(self._parse_properties()) 2030 2031 if create_token.token_type == TokenType.SEQUENCE: 2032 expression = self._parse_types() 2033 extend_props(self._parse_properties()) 2034 else: 2035 expression = self._parse_ddl_select() 2036 2037 # Some dialects also support using a table as an alias instead of a SELECT. 2038 # Here we fallback to this as an alternative. 2039 if not expression and has_alias: 2040 expression = self._try_parse(self._parse_table_parts) 2041 2042 if create_token.token_type == TokenType.TABLE: 2043 # exp.Properties.Location.POST_EXPRESSION 2044 extend_props(self._parse_properties()) 2045 2046 indexes = [] 2047 while True: 2048 index = self._parse_index() 2049 2050 # exp.Properties.Location.POST_INDEX 2051 extend_props(self._parse_properties()) 2052 if not index: 2053 break 2054 else: 2055 self._match(TokenType.COMMA) 2056 indexes.append(index) 2057 elif create_token.token_type == TokenType.VIEW: 2058 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2059 no_schema_binding = True 2060 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2061 extend_props(self._parse_properties()) 2062 2063 shallow = self._match_text_seq("SHALLOW") 2064 2065 if self._match_texts(self.CLONE_KEYWORDS): 2066 copy = self._prev.text.lower() == "copy" 2067 clone = self.expression( 2068 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2069 ) 2070 2071 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2072 return self._parse_as_command(start) 2073 2074 create_kind_text = create_token.text.upper() 2075 return self.expression( 2076 exp.Create, 2077 this=this, 2078 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2079 replace=replace, 2080 refresh=refresh, 2081 unique=unique, 2082 expression=expression, 2083 exists=exists, 2084 properties=properties, 2085 indexes=indexes, 2086 no_schema_binding=no_schema_binding, 2087 begin=begin, 2088 end=end, 2089 clone=clone, 2090 concurrently=concurrently, 2091 clustered=clustered, 2092 ) 2093 2094 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2095 seq = exp.SequenceProperties() 2096 2097 options = [] 2098 index = self._index 2099 2100 while self._curr: 2101 self._match(TokenType.COMMA) 2102 if self._match_text_seq("INCREMENT"): 2103 self._match_text_seq("BY") 2104 self._match_text_seq("=") 2105 seq.set("increment", self._parse_term()) 2106 elif self._match_text_seq("MINVALUE"): 2107 seq.set("minvalue", self._parse_term()) 2108 elif self._match_text_seq("MAXVALUE"): 2109 seq.set("maxvalue", self._parse_term()) 2110 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2111 self._match_text_seq("=") 2112 seq.set("start", self._parse_term()) 2113 elif self._match_text_seq("CACHE"): 2114 # T-SQL allows empty CACHE which is initialized dynamically 2115 seq.set("cache", self._parse_number() or True) 2116 elif self._match_text_seq("OWNED", "BY"): 2117 # "OWNED BY NONE" is the default 2118 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2119 else: 2120 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2121 if opt: 2122 options.append(opt) 2123 else: 2124 break 2125 2126 seq.set("options", options if options else None) 2127 return None if self._index == index else seq 2128 2129 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2130 # only used for teradata currently 2131 self._match(TokenType.COMMA) 2132 2133 kwargs = { 2134 "no": self._match_text_seq("NO"), 2135 "dual": self._match_text_seq("DUAL"), 2136 "before": self._match_text_seq("BEFORE"), 2137 "default": self._match_text_seq("DEFAULT"), 2138 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2139 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2140 "after": self._match_text_seq("AFTER"), 2141 "minimum": self._match_texts(("MIN", "MINIMUM")), 2142 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2143 } 2144 2145 if self._match_texts(self.PROPERTY_PARSERS): 2146 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2147 try: 2148 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2149 except TypeError: 2150 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2151 2152 return None 2153 2154 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2155 return self._parse_wrapped_csv(self._parse_property) 2156 2157 def _parse_property(self) -> t.Optional[exp.Expression]: 2158 if self._match_texts(self.PROPERTY_PARSERS): 2159 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2160 2161 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2162 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2163 2164 if self._match_text_seq("COMPOUND", "SORTKEY"): 2165 return self._parse_sortkey(compound=True) 2166 2167 if self._match_text_seq("SQL", "SECURITY"): 2168 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2169 2170 index = self._index 2171 key = self._parse_column() 2172 2173 if not self._match(TokenType.EQ): 2174 self._retreat(index) 2175 return self._parse_sequence_properties() 2176 2177 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2178 if isinstance(key, exp.Column): 2179 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2180 2181 value = self._parse_bitwise() or self._parse_var(any_token=True) 2182 2183 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2184 if isinstance(value, exp.Column): 2185 value = exp.var(value.name) 2186 2187 return self.expression(exp.Property, this=key, value=value) 2188 2189 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2190 if self._match_text_seq("BY"): 2191 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2192 2193 self._match(TokenType.ALIAS) 2194 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2195 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2196 2197 return self.expression( 2198 exp.FileFormatProperty, 2199 this=( 2200 self.expression( 2201 exp.InputOutputFormat, 2202 input_format=input_format, 2203 output_format=output_format, 2204 ) 2205 if input_format or output_format 2206 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2207 ), 2208 ) 2209 2210 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2211 field = self._parse_field() 2212 if isinstance(field, exp.Identifier) and not field.quoted: 2213 field = exp.var(field) 2214 2215 return field 2216 2217 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2218 self._match(TokenType.EQ) 2219 self._match(TokenType.ALIAS) 2220 2221 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2222 2223 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2224 properties = [] 2225 while True: 2226 if before: 2227 prop = self._parse_property_before() 2228 else: 2229 prop = self._parse_property() 2230 if not prop: 2231 break 2232 for p in ensure_list(prop): 2233 properties.append(p) 2234 2235 if properties: 2236 return self.expression(exp.Properties, expressions=properties) 2237 2238 return None 2239 2240 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2241 return self.expression( 2242 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2243 ) 2244 2245 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2246 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2247 security_specifier = self._prev.text.upper() 2248 return self.expression(exp.SecurityProperty, this=security_specifier) 2249 return None 2250 2251 def _parse_settings_property(self) -> exp.SettingsProperty: 2252 return self.expression( 2253 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2254 ) 2255 2256 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2257 if self._index >= 2: 2258 pre_volatile_token = self._tokens[self._index - 2] 2259 else: 2260 pre_volatile_token = None 2261 2262 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2263 return exp.VolatileProperty() 2264 2265 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2266 2267 def _parse_retention_period(self) -> exp.Var: 2268 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2269 number = self._parse_number() 2270 number_str = f"{number} " if number else "" 2271 unit = self._parse_var(any_token=True) 2272 return exp.var(f"{number_str}{unit}") 2273 2274 def _parse_system_versioning_property( 2275 self, with_: bool = False 2276 ) -> exp.WithSystemVersioningProperty: 2277 self._match(TokenType.EQ) 2278 prop = self.expression( 2279 exp.WithSystemVersioningProperty, 2280 **{ # type: ignore 2281 "on": True, 2282 "with": with_, 2283 }, 2284 ) 2285 2286 if self._match_text_seq("OFF"): 2287 prop.set("on", False) 2288 return prop 2289 2290 self._match(TokenType.ON) 2291 if self._match(TokenType.L_PAREN): 2292 while self._curr and not self._match(TokenType.R_PAREN): 2293 if self._match_text_seq("HISTORY_TABLE", "="): 2294 prop.set("this", self._parse_table_parts()) 2295 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2296 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2297 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2298 prop.set("retention_period", self._parse_retention_period()) 2299 2300 self._match(TokenType.COMMA) 2301 2302 return prop 2303 2304 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2305 self._match(TokenType.EQ) 2306 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2307 prop = self.expression(exp.DataDeletionProperty, on=on) 2308 2309 if self._match(TokenType.L_PAREN): 2310 while self._curr and not self._match(TokenType.R_PAREN): 2311 if self._match_text_seq("FILTER_COLUMN", "="): 2312 prop.set("filter_column", self._parse_column()) 2313 elif self._match_text_seq("RETENTION_PERIOD", "="): 2314 prop.set("retention_period", self._parse_retention_period()) 2315 2316 self._match(TokenType.COMMA) 2317 2318 return prop 2319 2320 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2321 kind = "HASH" 2322 expressions: t.Optional[t.List[exp.Expression]] = None 2323 if self._match_text_seq("BY", "HASH"): 2324 expressions = self._parse_wrapped_csv(self._parse_id_var) 2325 elif self._match_text_seq("BY", "RANDOM"): 2326 kind = "RANDOM" 2327 2328 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2329 buckets: t.Optional[exp.Expression] = None 2330 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2331 buckets = self._parse_number() 2332 2333 return self.expression( 2334 exp.DistributedByProperty, 2335 expressions=expressions, 2336 kind=kind, 2337 buckets=buckets, 2338 order=self._parse_order(), 2339 ) 2340 2341 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2342 self._match_text_seq("KEY") 2343 expressions = self._parse_wrapped_id_vars() 2344 return self.expression(expr_type, expressions=expressions) 2345 2346 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2347 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2348 prop = self._parse_system_versioning_property(with_=True) 2349 self._match_r_paren() 2350 return prop 2351 2352 if self._match(TokenType.L_PAREN, advance=False): 2353 return self._parse_wrapped_properties() 2354 2355 if self._match_text_seq("JOURNAL"): 2356 return self._parse_withjournaltable() 2357 2358 if self._match_texts(self.VIEW_ATTRIBUTES): 2359 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2360 2361 if self._match_text_seq("DATA"): 2362 return self._parse_withdata(no=False) 2363 elif self._match_text_seq("NO", "DATA"): 2364 return self._parse_withdata(no=True) 2365 2366 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2367 return self._parse_serde_properties(with_=True) 2368 2369 if self._match(TokenType.SCHEMA): 2370 return self.expression( 2371 exp.WithSchemaBindingProperty, 2372 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2373 ) 2374 2375 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2376 return self.expression( 2377 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2378 ) 2379 2380 if not self._next: 2381 return None 2382 2383 return self._parse_withisolatedloading() 2384 2385 def _parse_procedure_option(self) -> exp.Expression | None: 2386 if self._match_text_seq("EXECUTE", "AS"): 2387 return self.expression( 2388 exp.ExecuteAsProperty, 2389 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2390 or self._parse_string(), 2391 ) 2392 2393 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2394 2395 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2396 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2397 self._match(TokenType.EQ) 2398 2399 user = self._parse_id_var() 2400 self._match(TokenType.PARAMETER) 2401 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2402 2403 if not user or not host: 2404 return None 2405 2406 return exp.DefinerProperty(this=f"{user}@{host}") 2407 2408 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2409 self._match(TokenType.TABLE) 2410 self._match(TokenType.EQ) 2411 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2412 2413 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2414 return self.expression(exp.LogProperty, no=no) 2415 2416 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2417 return self.expression(exp.JournalProperty, **kwargs) 2418 2419 def _parse_checksum(self) -> exp.ChecksumProperty: 2420 self._match(TokenType.EQ) 2421 2422 on = None 2423 if self._match(TokenType.ON): 2424 on = True 2425 elif self._match_text_seq("OFF"): 2426 on = False 2427 2428 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2429 2430 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2431 return self.expression( 2432 exp.Cluster, 2433 expressions=( 2434 self._parse_wrapped_csv(self._parse_ordered) 2435 if wrapped 2436 else self._parse_csv(self._parse_ordered) 2437 ), 2438 ) 2439 2440 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2441 self._match_text_seq("BY") 2442 2443 self._match_l_paren() 2444 expressions = self._parse_csv(self._parse_column) 2445 self._match_r_paren() 2446 2447 if self._match_text_seq("SORTED", "BY"): 2448 self._match_l_paren() 2449 sorted_by = self._parse_csv(self._parse_ordered) 2450 self._match_r_paren() 2451 else: 2452 sorted_by = None 2453 2454 self._match(TokenType.INTO) 2455 buckets = self._parse_number() 2456 self._match_text_seq("BUCKETS") 2457 2458 return self.expression( 2459 exp.ClusteredByProperty, 2460 expressions=expressions, 2461 sorted_by=sorted_by, 2462 buckets=buckets, 2463 ) 2464 2465 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2466 if not self._match_text_seq("GRANTS"): 2467 self._retreat(self._index - 1) 2468 return None 2469 2470 return self.expression(exp.CopyGrantsProperty) 2471 2472 def _parse_freespace(self) -> exp.FreespaceProperty: 2473 self._match(TokenType.EQ) 2474 return self.expression( 2475 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2476 ) 2477 2478 def _parse_mergeblockratio( 2479 self, no: bool = False, default: bool = False 2480 ) -> exp.MergeBlockRatioProperty: 2481 if self._match(TokenType.EQ): 2482 return self.expression( 2483 exp.MergeBlockRatioProperty, 2484 this=self._parse_number(), 2485 percent=self._match(TokenType.PERCENT), 2486 ) 2487 2488 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2489 2490 def _parse_datablocksize( 2491 self, 2492 default: t.Optional[bool] = None, 2493 minimum: t.Optional[bool] = None, 2494 maximum: t.Optional[bool] = None, 2495 ) -> exp.DataBlocksizeProperty: 2496 self._match(TokenType.EQ) 2497 size = self._parse_number() 2498 2499 units = None 2500 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2501 units = self._prev.text 2502 2503 return self.expression( 2504 exp.DataBlocksizeProperty, 2505 size=size, 2506 units=units, 2507 default=default, 2508 minimum=minimum, 2509 maximum=maximum, 2510 ) 2511 2512 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2513 self._match(TokenType.EQ) 2514 always = self._match_text_seq("ALWAYS") 2515 manual = self._match_text_seq("MANUAL") 2516 never = self._match_text_seq("NEVER") 2517 default = self._match_text_seq("DEFAULT") 2518 2519 autotemp = None 2520 if self._match_text_seq("AUTOTEMP"): 2521 autotemp = self._parse_schema() 2522 2523 return self.expression( 2524 exp.BlockCompressionProperty, 2525 always=always, 2526 manual=manual, 2527 never=never, 2528 default=default, 2529 autotemp=autotemp, 2530 ) 2531 2532 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2533 index = self._index 2534 no = self._match_text_seq("NO") 2535 concurrent = self._match_text_seq("CONCURRENT") 2536 2537 if not self._match_text_seq("ISOLATED", "LOADING"): 2538 self._retreat(index) 2539 return None 2540 2541 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2542 return self.expression( 2543 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2544 ) 2545 2546 def _parse_locking(self) -> exp.LockingProperty: 2547 if self._match(TokenType.TABLE): 2548 kind = "TABLE" 2549 elif self._match(TokenType.VIEW): 2550 kind = "VIEW" 2551 elif self._match(TokenType.ROW): 2552 kind = "ROW" 2553 elif self._match_text_seq("DATABASE"): 2554 kind = "DATABASE" 2555 else: 2556 kind = None 2557 2558 if kind in ("DATABASE", "TABLE", "VIEW"): 2559 this = self._parse_table_parts() 2560 else: 2561 this = None 2562 2563 if self._match(TokenType.FOR): 2564 for_or_in = "FOR" 2565 elif self._match(TokenType.IN): 2566 for_or_in = "IN" 2567 else: 2568 for_or_in = None 2569 2570 if self._match_text_seq("ACCESS"): 2571 lock_type = "ACCESS" 2572 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2573 lock_type = "EXCLUSIVE" 2574 elif self._match_text_seq("SHARE"): 2575 lock_type = "SHARE" 2576 elif self._match_text_seq("READ"): 2577 lock_type = "READ" 2578 elif self._match_text_seq("WRITE"): 2579 lock_type = "WRITE" 2580 elif self._match_text_seq("CHECKSUM"): 2581 lock_type = "CHECKSUM" 2582 else: 2583 lock_type = None 2584 2585 override = self._match_text_seq("OVERRIDE") 2586 2587 return self.expression( 2588 exp.LockingProperty, 2589 this=this, 2590 kind=kind, 2591 for_or_in=for_or_in, 2592 lock_type=lock_type, 2593 override=override, 2594 ) 2595 2596 def _parse_partition_by(self) -> t.List[exp.Expression]: 2597 if self._match(TokenType.PARTITION_BY): 2598 return self._parse_csv(self._parse_assignment) 2599 return [] 2600 2601 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2602 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2603 if self._match_text_seq("MINVALUE"): 2604 return exp.var("MINVALUE") 2605 if self._match_text_seq("MAXVALUE"): 2606 return exp.var("MAXVALUE") 2607 return self._parse_bitwise() 2608 2609 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2610 expression = None 2611 from_expressions = None 2612 to_expressions = None 2613 2614 if self._match(TokenType.IN): 2615 this = self._parse_wrapped_csv(self._parse_bitwise) 2616 elif self._match(TokenType.FROM): 2617 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2618 self._match_text_seq("TO") 2619 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2620 elif self._match_text_seq("WITH", "(", "MODULUS"): 2621 this = self._parse_number() 2622 self._match_text_seq(",", "REMAINDER") 2623 expression = self._parse_number() 2624 self._match_r_paren() 2625 else: 2626 self.raise_error("Failed to parse partition bound spec.") 2627 2628 return self.expression( 2629 exp.PartitionBoundSpec, 2630 this=this, 2631 expression=expression, 2632 from_expressions=from_expressions, 2633 to_expressions=to_expressions, 2634 ) 2635 2636 # https://www.postgresql.org/docs/current/sql-createtable.html 2637 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2638 if not self._match_text_seq("OF"): 2639 self._retreat(self._index - 1) 2640 return None 2641 2642 this = self._parse_table(schema=True) 2643 2644 if self._match(TokenType.DEFAULT): 2645 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2646 elif self._match_text_seq("FOR", "VALUES"): 2647 expression = self._parse_partition_bound_spec() 2648 else: 2649 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2650 2651 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2652 2653 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2654 self._match(TokenType.EQ) 2655 return self.expression( 2656 exp.PartitionedByProperty, 2657 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2658 ) 2659 2660 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2661 if self._match_text_seq("AND", "STATISTICS"): 2662 statistics = True 2663 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2664 statistics = False 2665 else: 2666 statistics = None 2667 2668 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2669 2670 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2671 if self._match_text_seq("SQL"): 2672 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2673 return None 2674 2675 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2676 if self._match_text_seq("SQL", "DATA"): 2677 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2678 return None 2679 2680 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2681 if self._match_text_seq("PRIMARY", "INDEX"): 2682 return exp.NoPrimaryIndexProperty() 2683 if self._match_text_seq("SQL"): 2684 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2685 return None 2686 2687 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2688 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2689 return exp.OnCommitProperty() 2690 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2691 return exp.OnCommitProperty(delete=True) 2692 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2693 2694 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2695 if self._match_text_seq("SQL", "DATA"): 2696 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2697 return None 2698 2699 def _parse_distkey(self) -> exp.DistKeyProperty: 2700 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2701 2702 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2703 table = self._parse_table(schema=True) 2704 2705 options = [] 2706 while self._match_texts(("INCLUDING", "EXCLUDING")): 2707 this = self._prev.text.upper() 2708 2709 id_var = self._parse_id_var() 2710 if not id_var: 2711 return None 2712 2713 options.append( 2714 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2715 ) 2716 2717 return self.expression(exp.LikeProperty, this=table, expressions=options) 2718 2719 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2720 return self.expression( 2721 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2722 ) 2723 2724 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2725 self._match(TokenType.EQ) 2726 return self.expression( 2727 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2728 ) 2729 2730 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2731 self._match_text_seq("WITH", "CONNECTION") 2732 return self.expression( 2733 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2734 ) 2735 2736 def _parse_returns(self) -> exp.ReturnsProperty: 2737 value: t.Optional[exp.Expression] 2738 null = None 2739 is_table = self._match(TokenType.TABLE) 2740 2741 if is_table: 2742 if self._match(TokenType.LT): 2743 value = self.expression( 2744 exp.Schema, 2745 this="TABLE", 2746 expressions=self._parse_csv(self._parse_struct_types), 2747 ) 2748 if not self._match(TokenType.GT): 2749 self.raise_error("Expecting >") 2750 else: 2751 value = self._parse_schema(exp.var("TABLE")) 2752 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2753 null = True 2754 value = None 2755 else: 2756 value = self._parse_types() 2757 2758 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2759 2760 def _parse_describe(self) -> exp.Describe: 2761 kind = self._match_set(self.CREATABLES) and self._prev.text 2762 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2763 if self._match(TokenType.DOT): 2764 style = None 2765 self._retreat(self._index - 2) 2766 2767 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2768 2769 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2770 this = self._parse_statement() 2771 else: 2772 this = self._parse_table(schema=True) 2773 2774 properties = self._parse_properties() 2775 expressions = properties.expressions if properties else None 2776 partition = self._parse_partition() 2777 return self.expression( 2778 exp.Describe, 2779 this=this, 2780 style=style, 2781 kind=kind, 2782 expressions=expressions, 2783 partition=partition, 2784 format=format, 2785 ) 2786 2787 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2788 kind = self._prev.text.upper() 2789 expressions = [] 2790 2791 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2792 if self._match(TokenType.WHEN): 2793 expression = self._parse_disjunction() 2794 self._match(TokenType.THEN) 2795 else: 2796 expression = None 2797 2798 else_ = self._match(TokenType.ELSE) 2799 2800 if not self._match(TokenType.INTO): 2801 return None 2802 2803 return self.expression( 2804 exp.ConditionalInsert, 2805 this=self.expression( 2806 exp.Insert, 2807 this=self._parse_table(schema=True), 2808 expression=self._parse_derived_table_values(), 2809 ), 2810 expression=expression, 2811 else_=else_, 2812 ) 2813 2814 expression = parse_conditional_insert() 2815 while expression is not None: 2816 expressions.append(expression) 2817 expression = parse_conditional_insert() 2818 2819 return self.expression( 2820 exp.MultitableInserts, 2821 kind=kind, 2822 comments=comments, 2823 expressions=expressions, 2824 source=self._parse_table(), 2825 ) 2826 2827 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2828 comments = [] 2829 hint = self._parse_hint() 2830 overwrite = self._match(TokenType.OVERWRITE) 2831 ignore = self._match(TokenType.IGNORE) 2832 local = self._match_text_seq("LOCAL") 2833 alternative = None 2834 is_function = None 2835 2836 if self._match_text_seq("DIRECTORY"): 2837 this: t.Optional[exp.Expression] = self.expression( 2838 exp.Directory, 2839 this=self._parse_var_or_string(), 2840 local=local, 2841 row_format=self._parse_row_format(match_row=True), 2842 ) 2843 else: 2844 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2845 comments += ensure_list(self._prev_comments) 2846 return self._parse_multitable_inserts(comments) 2847 2848 if self._match(TokenType.OR): 2849 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2850 2851 self._match(TokenType.INTO) 2852 comments += ensure_list(self._prev_comments) 2853 self._match(TokenType.TABLE) 2854 is_function = self._match(TokenType.FUNCTION) 2855 2856 this = ( 2857 self._parse_table(schema=True, parse_partition=True) 2858 if not is_function 2859 else self._parse_function() 2860 ) 2861 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2862 this.set("alias", self._parse_table_alias()) 2863 2864 returning = self._parse_returning() 2865 2866 return self.expression( 2867 exp.Insert, 2868 comments=comments, 2869 hint=hint, 2870 is_function=is_function, 2871 this=this, 2872 stored=self._match_text_seq("STORED") and self._parse_stored(), 2873 by_name=self._match_text_seq("BY", "NAME"), 2874 exists=self._parse_exists(), 2875 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2876 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2877 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2878 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2879 conflict=self._parse_on_conflict(), 2880 returning=returning or self._parse_returning(), 2881 overwrite=overwrite, 2882 alternative=alternative, 2883 ignore=ignore, 2884 source=self._match(TokenType.TABLE) and self._parse_table(), 2885 ) 2886 2887 def _parse_kill(self) -> exp.Kill: 2888 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2889 2890 return self.expression( 2891 exp.Kill, 2892 this=self._parse_primary(), 2893 kind=kind, 2894 ) 2895 2896 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2897 conflict = self._match_text_seq("ON", "CONFLICT") 2898 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2899 2900 if not conflict and not duplicate: 2901 return None 2902 2903 conflict_keys = None 2904 constraint = None 2905 2906 if conflict: 2907 if self._match_text_seq("ON", "CONSTRAINT"): 2908 constraint = self._parse_id_var() 2909 elif self._match(TokenType.L_PAREN): 2910 conflict_keys = self._parse_csv(self._parse_id_var) 2911 self._match_r_paren() 2912 2913 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2914 if self._prev.token_type == TokenType.UPDATE: 2915 self._match(TokenType.SET) 2916 expressions = self._parse_csv(self._parse_equality) 2917 else: 2918 expressions = None 2919 2920 return self.expression( 2921 exp.OnConflict, 2922 duplicate=duplicate, 2923 expressions=expressions, 2924 action=action, 2925 conflict_keys=conflict_keys, 2926 constraint=constraint, 2927 where=self._parse_where(), 2928 ) 2929 2930 def _parse_returning(self) -> t.Optional[exp.Returning]: 2931 if not self._match(TokenType.RETURNING): 2932 return None 2933 return self.expression( 2934 exp.Returning, 2935 expressions=self._parse_csv(self._parse_expression), 2936 into=self._match(TokenType.INTO) and self._parse_table_part(), 2937 ) 2938 2939 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2940 if not self._match(TokenType.FORMAT): 2941 return None 2942 return self._parse_row_format() 2943 2944 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2945 index = self._index 2946 with_ = with_ or self._match_text_seq("WITH") 2947 2948 if not self._match(TokenType.SERDE_PROPERTIES): 2949 self._retreat(index) 2950 return None 2951 return self.expression( 2952 exp.SerdeProperties, 2953 **{ # type: ignore 2954 "expressions": self._parse_wrapped_properties(), 2955 "with": with_, 2956 }, 2957 ) 2958 2959 def _parse_row_format( 2960 self, match_row: bool = False 2961 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2962 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2963 return None 2964 2965 if self._match_text_seq("SERDE"): 2966 this = self._parse_string() 2967 2968 serde_properties = self._parse_serde_properties() 2969 2970 return self.expression( 2971 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2972 ) 2973 2974 self._match_text_seq("DELIMITED") 2975 2976 kwargs = {} 2977 2978 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2979 kwargs["fields"] = self._parse_string() 2980 if self._match_text_seq("ESCAPED", "BY"): 2981 kwargs["escaped"] = self._parse_string() 2982 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2983 kwargs["collection_items"] = self._parse_string() 2984 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2985 kwargs["map_keys"] = self._parse_string() 2986 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2987 kwargs["lines"] = self._parse_string() 2988 if self._match_text_seq("NULL", "DEFINED", "AS"): 2989 kwargs["null"] = self._parse_string() 2990 2991 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2992 2993 def _parse_load(self) -> exp.LoadData | exp.Command: 2994 if self._match_text_seq("DATA"): 2995 local = self._match_text_seq("LOCAL") 2996 self._match_text_seq("INPATH") 2997 inpath = self._parse_string() 2998 overwrite = self._match(TokenType.OVERWRITE) 2999 self._match_pair(TokenType.INTO, TokenType.TABLE) 3000 3001 return self.expression( 3002 exp.LoadData, 3003 this=self._parse_table(schema=True), 3004 local=local, 3005 overwrite=overwrite, 3006 inpath=inpath, 3007 partition=self._parse_partition(), 3008 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3009 serde=self._match_text_seq("SERDE") and self._parse_string(), 3010 ) 3011 return self._parse_as_command(self._prev) 3012 3013 def _parse_delete(self) -> exp.Delete: 3014 # This handles MySQL's "Multiple-Table Syntax" 3015 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3016 tables = None 3017 if not self._match(TokenType.FROM, advance=False): 3018 tables = self._parse_csv(self._parse_table) or None 3019 3020 returning = self._parse_returning() 3021 3022 return self.expression( 3023 exp.Delete, 3024 tables=tables, 3025 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3026 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3027 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3028 where=self._parse_where(), 3029 returning=returning or self._parse_returning(), 3030 limit=self._parse_limit(), 3031 ) 3032 3033 def _parse_update(self) -> exp.Update: 3034 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3035 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3036 returning = self._parse_returning() 3037 return self.expression( 3038 exp.Update, 3039 **{ # type: ignore 3040 "this": this, 3041 "expressions": expressions, 3042 "from": self._parse_from(joins=True), 3043 "where": self._parse_where(), 3044 "returning": returning or self._parse_returning(), 3045 "order": self._parse_order(), 3046 "limit": self._parse_limit(), 3047 }, 3048 ) 3049 3050 def _parse_use(self) -> exp.Use: 3051 return self.expression( 3052 exp.Use, 3053 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3054 this=self._parse_table(schema=False), 3055 ) 3056 3057 def _parse_uncache(self) -> exp.Uncache: 3058 if not self._match(TokenType.TABLE): 3059 self.raise_error("Expecting TABLE after UNCACHE") 3060 3061 return self.expression( 3062 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3063 ) 3064 3065 def _parse_cache(self) -> exp.Cache: 3066 lazy = self._match_text_seq("LAZY") 3067 self._match(TokenType.TABLE) 3068 table = self._parse_table(schema=True) 3069 3070 options = [] 3071 if self._match_text_seq("OPTIONS"): 3072 self._match_l_paren() 3073 k = self._parse_string() 3074 self._match(TokenType.EQ) 3075 v = self._parse_string() 3076 options = [k, v] 3077 self._match_r_paren() 3078 3079 self._match(TokenType.ALIAS) 3080 return self.expression( 3081 exp.Cache, 3082 this=table, 3083 lazy=lazy, 3084 options=options, 3085 expression=self._parse_select(nested=True), 3086 ) 3087 3088 def _parse_partition(self) -> t.Optional[exp.Partition]: 3089 if not self._match_texts(self.PARTITION_KEYWORDS): 3090 return None 3091 3092 return self.expression( 3093 exp.Partition, 3094 subpartition=self._prev.text.upper() == "SUBPARTITION", 3095 expressions=self._parse_wrapped_csv(self._parse_assignment), 3096 ) 3097 3098 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3099 def _parse_value_expression() -> t.Optional[exp.Expression]: 3100 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3101 return exp.var(self._prev.text.upper()) 3102 return self._parse_expression() 3103 3104 if self._match(TokenType.L_PAREN): 3105 expressions = self._parse_csv(_parse_value_expression) 3106 self._match_r_paren() 3107 return self.expression(exp.Tuple, expressions=expressions) 3108 3109 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3110 expression = self._parse_expression() 3111 if expression: 3112 return self.expression(exp.Tuple, expressions=[expression]) 3113 return None 3114 3115 def _parse_projections(self) -> t.List[exp.Expression]: 3116 return self._parse_expressions() 3117 3118 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3119 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3120 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3121 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3122 ) 3123 elif self._match(TokenType.FROM): 3124 from_ = self._parse_from(skip_from_token=True) 3125 # Support parentheses for duckdb FROM-first syntax 3126 select = self._parse_select() 3127 if select: 3128 select.set("from", from_) 3129 this = select 3130 else: 3131 this = exp.select("*").from_(t.cast(exp.From, from_)) 3132 else: 3133 this = ( 3134 self._parse_table() 3135 if table 3136 else self._parse_select(nested=True, parse_set_operation=False) 3137 ) 3138 3139 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3140 # in case a modifier (e.g. join) is following 3141 if table and isinstance(this, exp.Values) and this.alias: 3142 alias = this.args["alias"].pop() 3143 this = exp.Table(this=this, alias=alias) 3144 3145 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3146 3147 return this 3148 3149 def _parse_select( 3150 self, 3151 nested: bool = False, 3152 table: bool = False, 3153 parse_subquery_alias: bool = True, 3154 parse_set_operation: bool = True, 3155 ) -> t.Optional[exp.Expression]: 3156 cte = self._parse_with() 3157 3158 if cte: 3159 this = self._parse_statement() 3160 3161 if not this: 3162 self.raise_error("Failed to parse any statement following CTE") 3163 return cte 3164 3165 if "with" in this.arg_types: 3166 this.set("with", cte) 3167 else: 3168 self.raise_error(f"{this.key} does not support CTE") 3169 this = cte 3170 3171 return this 3172 3173 # duckdb supports leading with FROM x 3174 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3175 3176 if self._match(TokenType.SELECT): 3177 comments = self._prev_comments 3178 3179 hint = self._parse_hint() 3180 3181 if self._next and not self._next.token_type == TokenType.DOT: 3182 all_ = self._match(TokenType.ALL) 3183 distinct = self._match_set(self.DISTINCT_TOKENS) 3184 else: 3185 all_, distinct = None, None 3186 3187 kind = ( 3188 self._match(TokenType.ALIAS) 3189 and self._match_texts(("STRUCT", "VALUE")) 3190 and self._prev.text.upper() 3191 ) 3192 3193 if distinct: 3194 distinct = self.expression( 3195 exp.Distinct, 3196 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3197 ) 3198 3199 if all_ and distinct: 3200 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3201 3202 operation_modifiers = [] 3203 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3204 operation_modifiers.append(exp.var(self._prev.text.upper())) 3205 3206 limit = self._parse_limit(top=True) 3207 projections = self._parse_projections() 3208 3209 this = self.expression( 3210 exp.Select, 3211 kind=kind, 3212 hint=hint, 3213 distinct=distinct, 3214 expressions=projections, 3215 limit=limit, 3216 operation_modifiers=operation_modifiers or None, 3217 ) 3218 this.comments = comments 3219 3220 into = self._parse_into() 3221 if into: 3222 this.set("into", into) 3223 3224 if not from_: 3225 from_ = self._parse_from() 3226 3227 if from_: 3228 this.set("from", from_) 3229 3230 this = self._parse_query_modifiers(this) 3231 elif (table or nested) and self._match(TokenType.L_PAREN): 3232 this = self._parse_wrapped_select(table=table) 3233 3234 # We return early here so that the UNION isn't attached to the subquery by the 3235 # following call to _parse_set_operations, but instead becomes the parent node 3236 self._match_r_paren() 3237 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3238 elif self._match(TokenType.VALUES, advance=False): 3239 this = self._parse_derived_table_values() 3240 elif from_: 3241 this = exp.select("*").from_(from_.this, copy=False) 3242 elif self._match(TokenType.SUMMARIZE): 3243 table = self._match(TokenType.TABLE) 3244 this = self._parse_select() or self._parse_string() or self._parse_table() 3245 return self.expression(exp.Summarize, this=this, table=table) 3246 elif self._match(TokenType.DESCRIBE): 3247 this = self._parse_describe() 3248 elif self._match_text_seq("STREAM"): 3249 this = self._parse_function() 3250 if this: 3251 this = self.expression(exp.Stream, this=this) 3252 else: 3253 self._retreat(self._index - 1) 3254 else: 3255 this = None 3256 3257 return self._parse_set_operations(this) if parse_set_operation else this 3258 3259 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3260 self._match_text_seq("SEARCH") 3261 3262 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3263 3264 if not kind: 3265 return None 3266 3267 self._match_text_seq("FIRST", "BY") 3268 3269 return self.expression( 3270 exp.RecursiveWithSearch, 3271 kind=kind, 3272 this=self._parse_id_var(), 3273 expression=self._match_text_seq("SET") and self._parse_id_var(), 3274 using=self._match_text_seq("USING") and self._parse_id_var(), 3275 ) 3276 3277 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3278 if not skip_with_token and not self._match(TokenType.WITH): 3279 return None 3280 3281 comments = self._prev_comments 3282 recursive = self._match(TokenType.RECURSIVE) 3283 3284 last_comments = None 3285 expressions = [] 3286 while True: 3287 cte = self._parse_cte() 3288 if isinstance(cte, exp.CTE): 3289 expressions.append(cte) 3290 if last_comments: 3291 cte.add_comments(last_comments) 3292 3293 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3294 break 3295 else: 3296 self._match(TokenType.WITH) 3297 3298 last_comments = self._prev_comments 3299 3300 return self.expression( 3301 exp.With, 3302 comments=comments, 3303 expressions=expressions, 3304 recursive=recursive, 3305 search=self._parse_recursive_with_search(), 3306 ) 3307 3308 def _parse_cte(self) -> t.Optional[exp.CTE]: 3309 index = self._index 3310 3311 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3312 if not alias or not alias.this: 3313 self.raise_error("Expected CTE to have alias") 3314 3315 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3316 self._retreat(index) 3317 return None 3318 3319 comments = self._prev_comments 3320 3321 if self._match_text_seq("NOT", "MATERIALIZED"): 3322 materialized = False 3323 elif self._match_text_seq("MATERIALIZED"): 3324 materialized = True 3325 else: 3326 materialized = None 3327 3328 cte = self.expression( 3329 exp.CTE, 3330 this=self._parse_wrapped(self._parse_statement), 3331 alias=alias, 3332 materialized=materialized, 3333 comments=comments, 3334 ) 3335 3336 if isinstance(cte.this, exp.Values): 3337 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3338 3339 return cte 3340 3341 def _parse_table_alias( 3342 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3343 ) -> t.Optional[exp.TableAlias]: 3344 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3345 # so this section tries to parse the clause version and if it fails, it treats the token 3346 # as an identifier (alias) 3347 if self._can_parse_limit_or_offset(): 3348 return None 3349 3350 any_token = self._match(TokenType.ALIAS) 3351 alias = ( 3352 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3353 or self._parse_string_as_identifier() 3354 ) 3355 3356 index = self._index 3357 if self._match(TokenType.L_PAREN): 3358 columns = self._parse_csv(self._parse_function_parameter) 3359 self._match_r_paren() if columns else self._retreat(index) 3360 else: 3361 columns = None 3362 3363 if not alias and not columns: 3364 return None 3365 3366 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3367 3368 # We bubble up comments from the Identifier to the TableAlias 3369 if isinstance(alias, exp.Identifier): 3370 table_alias.add_comments(alias.pop_comments()) 3371 3372 return table_alias 3373 3374 def _parse_subquery( 3375 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3376 ) -> t.Optional[exp.Subquery]: 3377 if not this: 3378 return None 3379 3380 return self.expression( 3381 exp.Subquery, 3382 this=this, 3383 pivots=self._parse_pivots(), 3384 alias=self._parse_table_alias() if parse_alias else None, 3385 sample=self._parse_table_sample(), 3386 ) 3387 3388 def _implicit_unnests_to_explicit(self, this: E) -> E: 3389 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3390 3391 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3392 for i, join in enumerate(this.args.get("joins") or []): 3393 table = join.this 3394 normalized_table = table.copy() 3395 normalized_table.meta["maybe_column"] = True 3396 normalized_table = _norm(normalized_table, dialect=self.dialect) 3397 3398 if isinstance(table, exp.Table) and not join.args.get("on"): 3399 if normalized_table.parts[0].name in refs: 3400 table_as_column = table.to_column() 3401 unnest = exp.Unnest(expressions=[table_as_column]) 3402 3403 # Table.to_column creates a parent Alias node that we want to convert to 3404 # a TableAlias and attach to the Unnest, so it matches the parser's output 3405 if isinstance(table.args.get("alias"), exp.TableAlias): 3406 table_as_column.replace(table_as_column.this) 3407 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3408 3409 table.replace(unnest) 3410 3411 refs.add(normalized_table.alias_or_name) 3412 3413 return this 3414 3415 def _parse_query_modifiers( 3416 self, this: t.Optional[exp.Expression] 3417 ) -> t.Optional[exp.Expression]: 3418 if isinstance(this, self.MODIFIABLES): 3419 for join in self._parse_joins(): 3420 this.append("joins", join) 3421 for lateral in iter(self._parse_lateral, None): 3422 this.append("laterals", lateral) 3423 3424 while True: 3425 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3426 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3427 key, expression = parser(self) 3428 3429 if expression: 3430 this.set(key, expression) 3431 if key == "limit": 3432 offset = expression.args.pop("offset", None) 3433 3434 if offset: 3435 offset = exp.Offset(expression=offset) 3436 this.set("offset", offset) 3437 3438 limit_by_expressions = expression.expressions 3439 expression.set("expressions", None) 3440 offset.set("expressions", limit_by_expressions) 3441 continue 3442 break 3443 3444 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3445 this = self._implicit_unnests_to_explicit(this) 3446 3447 return this 3448 3449 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3450 start = self._curr 3451 while self._curr: 3452 self._advance() 3453 3454 end = self._tokens[self._index - 1] 3455 return exp.Hint(expressions=[self._find_sql(start, end)]) 3456 3457 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3458 return self._parse_function_call() 3459 3460 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3461 start_index = self._index 3462 should_fallback_to_string = False 3463 3464 hints = [] 3465 try: 3466 for hint in iter( 3467 lambda: self._parse_csv( 3468 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3469 ), 3470 [], 3471 ): 3472 hints.extend(hint) 3473 except ParseError: 3474 should_fallback_to_string = True 3475 3476 if should_fallback_to_string or self._curr: 3477 self._retreat(start_index) 3478 return self._parse_hint_fallback_to_string() 3479 3480 return self.expression(exp.Hint, expressions=hints) 3481 3482 def _parse_hint(self) -> t.Optional[exp.Hint]: 3483 if self._match(TokenType.HINT) and self._prev_comments: 3484 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3485 3486 return None 3487 3488 def _parse_into(self) -> t.Optional[exp.Into]: 3489 if not self._match(TokenType.INTO): 3490 return None 3491 3492 temp = self._match(TokenType.TEMPORARY) 3493 unlogged = self._match_text_seq("UNLOGGED") 3494 self._match(TokenType.TABLE) 3495 3496 return self.expression( 3497 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3498 ) 3499 3500 def _parse_from( 3501 self, joins: bool = False, skip_from_token: bool = False 3502 ) -> t.Optional[exp.From]: 3503 if not skip_from_token and not self._match(TokenType.FROM): 3504 return None 3505 3506 return self.expression( 3507 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3508 ) 3509 3510 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3511 return self.expression( 3512 exp.MatchRecognizeMeasure, 3513 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3514 this=self._parse_expression(), 3515 ) 3516 3517 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3518 if not self._match(TokenType.MATCH_RECOGNIZE): 3519 return None 3520 3521 self._match_l_paren() 3522 3523 partition = self._parse_partition_by() 3524 order = self._parse_order() 3525 3526 measures = ( 3527 self._parse_csv(self._parse_match_recognize_measure) 3528 if self._match_text_seq("MEASURES") 3529 else None 3530 ) 3531 3532 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3533 rows = exp.var("ONE ROW PER MATCH") 3534 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3535 text = "ALL ROWS PER MATCH" 3536 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3537 text += " SHOW EMPTY MATCHES" 3538 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3539 text += " OMIT EMPTY MATCHES" 3540 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3541 text += " WITH UNMATCHED ROWS" 3542 rows = exp.var(text) 3543 else: 3544 rows = None 3545 3546 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3547 text = "AFTER MATCH SKIP" 3548 if self._match_text_seq("PAST", "LAST", "ROW"): 3549 text += " PAST LAST ROW" 3550 elif self._match_text_seq("TO", "NEXT", "ROW"): 3551 text += " TO NEXT ROW" 3552 elif self._match_text_seq("TO", "FIRST"): 3553 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3554 elif self._match_text_seq("TO", "LAST"): 3555 text += f" TO LAST {self._advance_any().text}" # type: ignore 3556 after = exp.var(text) 3557 else: 3558 after = None 3559 3560 if self._match_text_seq("PATTERN"): 3561 self._match_l_paren() 3562 3563 if not self._curr: 3564 self.raise_error("Expecting )", self._curr) 3565 3566 paren = 1 3567 start = self._curr 3568 3569 while self._curr and paren > 0: 3570 if self._curr.token_type == TokenType.L_PAREN: 3571 paren += 1 3572 if self._curr.token_type == TokenType.R_PAREN: 3573 paren -= 1 3574 3575 end = self._prev 3576 self._advance() 3577 3578 if paren > 0: 3579 self.raise_error("Expecting )", self._curr) 3580 3581 pattern = exp.var(self._find_sql(start, end)) 3582 else: 3583 pattern = None 3584 3585 define = ( 3586 self._parse_csv(self._parse_name_as_expression) 3587 if self._match_text_seq("DEFINE") 3588 else None 3589 ) 3590 3591 self._match_r_paren() 3592 3593 return self.expression( 3594 exp.MatchRecognize, 3595 partition_by=partition, 3596 order=order, 3597 measures=measures, 3598 rows=rows, 3599 after=after, 3600 pattern=pattern, 3601 define=define, 3602 alias=self._parse_table_alias(), 3603 ) 3604 3605 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3606 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3607 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3608 cross_apply = False 3609 3610 if cross_apply is not None: 3611 this = self._parse_select(table=True) 3612 view = None 3613 outer = None 3614 elif self._match(TokenType.LATERAL): 3615 this = self._parse_select(table=True) 3616 view = self._match(TokenType.VIEW) 3617 outer = self._match(TokenType.OUTER) 3618 else: 3619 return None 3620 3621 if not this: 3622 this = ( 3623 self._parse_unnest() 3624 or self._parse_function() 3625 or self._parse_id_var(any_token=False) 3626 ) 3627 3628 while self._match(TokenType.DOT): 3629 this = exp.Dot( 3630 this=this, 3631 expression=self._parse_function() or self._parse_id_var(any_token=False), 3632 ) 3633 3634 ordinality: t.Optional[bool] = None 3635 3636 if view: 3637 table = self._parse_id_var(any_token=False) 3638 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3639 table_alias: t.Optional[exp.TableAlias] = self.expression( 3640 exp.TableAlias, this=table, columns=columns 3641 ) 3642 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3643 # We move the alias from the lateral's child node to the lateral itself 3644 table_alias = this.args["alias"].pop() 3645 else: 3646 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3647 table_alias = self._parse_table_alias() 3648 3649 return self.expression( 3650 exp.Lateral, 3651 this=this, 3652 view=view, 3653 outer=outer, 3654 alias=table_alias, 3655 cross_apply=cross_apply, 3656 ordinality=ordinality, 3657 ) 3658 3659 def _parse_join_parts( 3660 self, 3661 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3662 return ( 3663 self._match_set(self.JOIN_METHODS) and self._prev, 3664 self._match_set(self.JOIN_SIDES) and self._prev, 3665 self._match_set(self.JOIN_KINDS) and self._prev, 3666 ) 3667 3668 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3669 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3670 this = self._parse_column() 3671 if isinstance(this, exp.Column): 3672 return this.this 3673 return this 3674 3675 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3676 3677 def _parse_join( 3678 self, skip_join_token: bool = False, parse_bracket: bool = False 3679 ) -> t.Optional[exp.Join]: 3680 if self._match(TokenType.COMMA): 3681 table = self._try_parse(self._parse_table) 3682 if table: 3683 return self.expression(exp.Join, this=table) 3684 return None 3685 3686 index = self._index 3687 method, side, kind = self._parse_join_parts() 3688 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3689 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3690 3691 if not skip_join_token and not join: 3692 self._retreat(index) 3693 kind = None 3694 method = None 3695 side = None 3696 3697 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3698 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3699 3700 if not skip_join_token and not join and not outer_apply and not cross_apply: 3701 return None 3702 3703 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3704 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3705 kwargs["expressions"] = self._parse_csv( 3706 lambda: self._parse_table(parse_bracket=parse_bracket) 3707 ) 3708 3709 if method: 3710 kwargs["method"] = method.text 3711 if side: 3712 kwargs["side"] = side.text 3713 if kind: 3714 kwargs["kind"] = kind.text 3715 if hint: 3716 kwargs["hint"] = hint 3717 3718 if self._match(TokenType.MATCH_CONDITION): 3719 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3720 3721 if self._match(TokenType.ON): 3722 kwargs["on"] = self._parse_assignment() 3723 elif self._match(TokenType.USING): 3724 kwargs["using"] = self._parse_using_identifiers() 3725 elif ( 3726 not (outer_apply or cross_apply) 3727 and not isinstance(kwargs["this"], exp.Unnest) 3728 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3729 ): 3730 index = self._index 3731 joins: t.Optional[list] = list(self._parse_joins()) 3732 3733 if joins and self._match(TokenType.ON): 3734 kwargs["on"] = self._parse_assignment() 3735 elif joins and self._match(TokenType.USING): 3736 kwargs["using"] = self._parse_using_identifiers() 3737 else: 3738 joins = None 3739 self._retreat(index) 3740 3741 kwargs["this"].set("joins", joins if joins else None) 3742 3743 kwargs["pivots"] = self._parse_pivots() 3744 3745 comments = [c for token in (method, side, kind) if token for c in token.comments] 3746 return self.expression(exp.Join, comments=comments, **kwargs) 3747 3748 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3749 this = self._parse_assignment() 3750 3751 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3752 return this 3753 3754 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3755 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3756 3757 return this 3758 3759 def _parse_index_params(self) -> exp.IndexParameters: 3760 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3761 3762 if self._match(TokenType.L_PAREN, advance=False): 3763 columns = self._parse_wrapped_csv(self._parse_with_operator) 3764 else: 3765 columns = None 3766 3767 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3768 partition_by = self._parse_partition_by() 3769 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3770 tablespace = ( 3771 self._parse_var(any_token=True) 3772 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3773 else None 3774 ) 3775 where = self._parse_where() 3776 3777 on = self._parse_field() if self._match(TokenType.ON) else None 3778 3779 return self.expression( 3780 exp.IndexParameters, 3781 using=using, 3782 columns=columns, 3783 include=include, 3784 partition_by=partition_by, 3785 where=where, 3786 with_storage=with_storage, 3787 tablespace=tablespace, 3788 on=on, 3789 ) 3790 3791 def _parse_index( 3792 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3793 ) -> t.Optional[exp.Index]: 3794 if index or anonymous: 3795 unique = None 3796 primary = None 3797 amp = None 3798 3799 self._match(TokenType.ON) 3800 self._match(TokenType.TABLE) # hive 3801 table = self._parse_table_parts(schema=True) 3802 else: 3803 unique = self._match(TokenType.UNIQUE) 3804 primary = self._match_text_seq("PRIMARY") 3805 amp = self._match_text_seq("AMP") 3806 3807 if not self._match(TokenType.INDEX): 3808 return None 3809 3810 index = self._parse_id_var() 3811 table = None 3812 3813 params = self._parse_index_params() 3814 3815 return self.expression( 3816 exp.Index, 3817 this=index, 3818 table=table, 3819 unique=unique, 3820 primary=primary, 3821 amp=amp, 3822 params=params, 3823 ) 3824 3825 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3826 hints: t.List[exp.Expression] = [] 3827 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3828 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3829 hints.append( 3830 self.expression( 3831 exp.WithTableHint, 3832 expressions=self._parse_csv( 3833 lambda: self._parse_function() or self._parse_var(any_token=True) 3834 ), 3835 ) 3836 ) 3837 self._match_r_paren() 3838 else: 3839 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3840 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3841 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3842 3843 self._match_set((TokenType.INDEX, TokenType.KEY)) 3844 if self._match(TokenType.FOR): 3845 hint.set("target", self._advance_any() and self._prev.text.upper()) 3846 3847 hint.set("expressions", self._parse_wrapped_id_vars()) 3848 hints.append(hint) 3849 3850 return hints or None 3851 3852 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3853 return ( 3854 (not schema and self._parse_function(optional_parens=False)) 3855 or self._parse_id_var(any_token=False) 3856 or self._parse_string_as_identifier() 3857 or self._parse_placeholder() 3858 ) 3859 3860 def _parse_table_parts( 3861 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3862 ) -> exp.Table: 3863 catalog = None 3864 db = None 3865 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3866 3867 while self._match(TokenType.DOT): 3868 if catalog: 3869 # This allows nesting the table in arbitrarily many dot expressions if needed 3870 table = self.expression( 3871 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3872 ) 3873 else: 3874 catalog = db 3875 db = table 3876 # "" used for tsql FROM a..b case 3877 table = self._parse_table_part(schema=schema) or "" 3878 3879 if ( 3880 wildcard 3881 and self._is_connected() 3882 and (isinstance(table, exp.Identifier) or not table) 3883 and self._match(TokenType.STAR) 3884 ): 3885 if isinstance(table, exp.Identifier): 3886 table.args["this"] += "*" 3887 else: 3888 table = exp.Identifier(this="*") 3889 3890 # We bubble up comments from the Identifier to the Table 3891 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3892 3893 if is_db_reference: 3894 catalog = db 3895 db = table 3896 table = None 3897 3898 if not table and not is_db_reference: 3899 self.raise_error(f"Expected table name but got {self._curr}") 3900 if not db and is_db_reference: 3901 self.raise_error(f"Expected database name but got {self._curr}") 3902 3903 table = self.expression( 3904 exp.Table, 3905 comments=comments, 3906 this=table, 3907 db=db, 3908 catalog=catalog, 3909 ) 3910 3911 changes = self._parse_changes() 3912 if changes: 3913 table.set("changes", changes) 3914 3915 at_before = self._parse_historical_data() 3916 if at_before: 3917 table.set("when", at_before) 3918 3919 pivots = self._parse_pivots() 3920 if pivots: 3921 table.set("pivots", pivots) 3922 3923 return table 3924 3925 def _parse_table( 3926 self, 3927 schema: bool = False, 3928 joins: bool = False, 3929 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3930 parse_bracket: bool = False, 3931 is_db_reference: bool = False, 3932 parse_partition: bool = False, 3933 ) -> t.Optional[exp.Expression]: 3934 lateral = self._parse_lateral() 3935 if lateral: 3936 return lateral 3937 3938 unnest = self._parse_unnest() 3939 if unnest: 3940 return unnest 3941 3942 values = self._parse_derived_table_values() 3943 if values: 3944 return values 3945 3946 subquery = self._parse_select(table=True) 3947 if subquery: 3948 if not subquery.args.get("pivots"): 3949 subquery.set("pivots", self._parse_pivots()) 3950 return subquery 3951 3952 bracket = parse_bracket and self._parse_bracket(None) 3953 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3954 3955 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3956 self._parse_table 3957 ) 3958 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3959 3960 only = self._match(TokenType.ONLY) 3961 3962 this = t.cast( 3963 exp.Expression, 3964 bracket 3965 or rows_from 3966 or self._parse_bracket( 3967 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3968 ), 3969 ) 3970 3971 if only: 3972 this.set("only", only) 3973 3974 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3975 self._match_text_seq("*") 3976 3977 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3978 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3979 this.set("partition", self._parse_partition()) 3980 3981 if schema: 3982 return self._parse_schema(this=this) 3983 3984 version = self._parse_version() 3985 3986 if version: 3987 this.set("version", version) 3988 3989 if self.dialect.ALIAS_POST_TABLESAMPLE: 3990 this.set("sample", self._parse_table_sample()) 3991 3992 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3993 if alias: 3994 this.set("alias", alias) 3995 3996 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3997 return self.expression( 3998 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3999 ) 4000 4001 this.set("hints", self._parse_table_hints()) 4002 4003 if not this.args.get("pivots"): 4004 this.set("pivots", self._parse_pivots()) 4005 4006 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4007 this.set("sample", self._parse_table_sample()) 4008 4009 if joins: 4010 for join in self._parse_joins(): 4011 this.append("joins", join) 4012 4013 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4014 this.set("ordinality", True) 4015 this.set("alias", self._parse_table_alias()) 4016 4017 return this 4018 4019 def _parse_version(self) -> t.Optional[exp.Version]: 4020 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4021 this = "TIMESTAMP" 4022 elif self._match(TokenType.VERSION_SNAPSHOT): 4023 this = "VERSION" 4024 else: 4025 return None 4026 4027 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4028 kind = self._prev.text.upper() 4029 start = self._parse_bitwise() 4030 self._match_texts(("TO", "AND")) 4031 end = self._parse_bitwise() 4032 expression: t.Optional[exp.Expression] = self.expression( 4033 exp.Tuple, expressions=[start, end] 4034 ) 4035 elif self._match_text_seq("CONTAINED", "IN"): 4036 kind = "CONTAINED IN" 4037 expression = self.expression( 4038 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4039 ) 4040 elif self._match(TokenType.ALL): 4041 kind = "ALL" 4042 expression = None 4043 else: 4044 self._match_text_seq("AS", "OF") 4045 kind = "AS OF" 4046 expression = self._parse_type() 4047 4048 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4049 4050 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4051 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4052 index = self._index 4053 historical_data = None 4054 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4055 this = self._prev.text.upper() 4056 kind = ( 4057 self._match(TokenType.L_PAREN) 4058 and self._match_texts(self.HISTORICAL_DATA_KIND) 4059 and self._prev.text.upper() 4060 ) 4061 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4062 4063 if expression: 4064 self._match_r_paren() 4065 historical_data = self.expression( 4066 exp.HistoricalData, this=this, kind=kind, expression=expression 4067 ) 4068 else: 4069 self._retreat(index) 4070 4071 return historical_data 4072 4073 def _parse_changes(self) -> t.Optional[exp.Changes]: 4074 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4075 return None 4076 4077 information = self._parse_var(any_token=True) 4078 self._match_r_paren() 4079 4080 return self.expression( 4081 exp.Changes, 4082 information=information, 4083 at_before=self._parse_historical_data(), 4084 end=self._parse_historical_data(), 4085 ) 4086 4087 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4088 if not self._match(TokenType.UNNEST): 4089 return None 4090 4091 expressions = self._parse_wrapped_csv(self._parse_equality) 4092 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4093 4094 alias = self._parse_table_alias() if with_alias else None 4095 4096 if alias: 4097 if self.dialect.UNNEST_COLUMN_ONLY: 4098 if alias.args.get("columns"): 4099 self.raise_error("Unexpected extra column alias in unnest.") 4100 4101 alias.set("columns", [alias.this]) 4102 alias.set("this", None) 4103 4104 columns = alias.args.get("columns") or [] 4105 if offset and len(expressions) < len(columns): 4106 offset = columns.pop() 4107 4108 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4109 self._match(TokenType.ALIAS) 4110 offset = self._parse_id_var( 4111 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4112 ) or exp.to_identifier("offset") 4113 4114 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4115 4116 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4117 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4118 if not is_derived and not ( 4119 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4120 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4121 ): 4122 return None 4123 4124 expressions = self._parse_csv(self._parse_value) 4125 alias = self._parse_table_alias() 4126 4127 if is_derived: 4128 self._match_r_paren() 4129 4130 return self.expression( 4131 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4132 ) 4133 4134 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4135 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4136 as_modifier and self._match_text_seq("USING", "SAMPLE") 4137 ): 4138 return None 4139 4140 bucket_numerator = None 4141 bucket_denominator = None 4142 bucket_field = None 4143 percent = None 4144 size = None 4145 seed = None 4146 4147 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4148 matched_l_paren = self._match(TokenType.L_PAREN) 4149 4150 if self.TABLESAMPLE_CSV: 4151 num = None 4152 expressions = self._parse_csv(self._parse_primary) 4153 else: 4154 expressions = None 4155 num = ( 4156 self._parse_factor() 4157 if self._match(TokenType.NUMBER, advance=False) 4158 else self._parse_primary() or self._parse_placeholder() 4159 ) 4160 4161 if self._match_text_seq("BUCKET"): 4162 bucket_numerator = self._parse_number() 4163 self._match_text_seq("OUT", "OF") 4164 bucket_denominator = bucket_denominator = self._parse_number() 4165 self._match(TokenType.ON) 4166 bucket_field = self._parse_field() 4167 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4168 percent = num 4169 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4170 size = num 4171 else: 4172 percent = num 4173 4174 if matched_l_paren: 4175 self._match_r_paren() 4176 4177 if self._match(TokenType.L_PAREN): 4178 method = self._parse_var(upper=True) 4179 seed = self._match(TokenType.COMMA) and self._parse_number() 4180 self._match_r_paren() 4181 elif self._match_texts(("SEED", "REPEATABLE")): 4182 seed = self._parse_wrapped(self._parse_number) 4183 4184 if not method and self.DEFAULT_SAMPLING_METHOD: 4185 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4186 4187 return self.expression( 4188 exp.TableSample, 4189 expressions=expressions, 4190 method=method, 4191 bucket_numerator=bucket_numerator, 4192 bucket_denominator=bucket_denominator, 4193 bucket_field=bucket_field, 4194 percent=percent, 4195 size=size, 4196 seed=seed, 4197 ) 4198 4199 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4200 return list(iter(self._parse_pivot, None)) or None 4201 4202 def _parse_joins(self) -> t.Iterator[exp.Join]: 4203 return iter(self._parse_join, None) 4204 4205 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4206 if not self._match(TokenType.INTO): 4207 return None 4208 4209 return self.expression( 4210 exp.UnpivotColumns, 4211 this=self._match_text_seq("NAME") and self._parse_column(), 4212 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4213 ) 4214 4215 # https://duckdb.org/docs/sql/statements/pivot 4216 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4217 def _parse_on() -> t.Optional[exp.Expression]: 4218 this = self._parse_bitwise() 4219 4220 if self._match(TokenType.IN): 4221 # PIVOT ... ON col IN (row_val1, row_val2) 4222 return self._parse_in(this) 4223 if self._match(TokenType.ALIAS, advance=False): 4224 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4225 return self._parse_alias(this) 4226 4227 return this 4228 4229 this = self._parse_table() 4230 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4231 into = self._parse_unpivot_columns() 4232 using = self._match(TokenType.USING) and self._parse_csv( 4233 lambda: self._parse_alias(self._parse_function()) 4234 ) 4235 group = self._parse_group() 4236 4237 return self.expression( 4238 exp.Pivot, 4239 this=this, 4240 expressions=expressions, 4241 using=using, 4242 group=group, 4243 unpivot=is_unpivot, 4244 into=into, 4245 ) 4246 4247 def _parse_pivot_in(self) -> exp.In: 4248 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4249 this = self._parse_select_or_expression() 4250 4251 self._match(TokenType.ALIAS) 4252 alias = self._parse_bitwise() 4253 if alias: 4254 if isinstance(alias, exp.Column) and not alias.db: 4255 alias = alias.this 4256 return self.expression(exp.PivotAlias, this=this, alias=alias) 4257 4258 return this 4259 4260 value = self._parse_column() 4261 4262 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4263 self.raise_error("Expecting IN (") 4264 4265 if self._match(TokenType.ANY): 4266 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4267 else: 4268 exprs = self._parse_csv(_parse_aliased_expression) 4269 4270 self._match_r_paren() 4271 return self.expression(exp.In, this=value, expressions=exprs) 4272 4273 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4274 index = self._index 4275 include_nulls = None 4276 4277 if self._match(TokenType.PIVOT): 4278 unpivot = False 4279 elif self._match(TokenType.UNPIVOT): 4280 unpivot = True 4281 4282 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4283 if self._match_text_seq("INCLUDE", "NULLS"): 4284 include_nulls = True 4285 elif self._match_text_seq("EXCLUDE", "NULLS"): 4286 include_nulls = False 4287 else: 4288 return None 4289 4290 expressions = [] 4291 4292 if not self._match(TokenType.L_PAREN): 4293 self._retreat(index) 4294 return None 4295 4296 if unpivot: 4297 expressions = self._parse_csv(self._parse_column) 4298 else: 4299 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4300 4301 if not expressions: 4302 self.raise_error("Failed to parse PIVOT's aggregation list") 4303 4304 if not self._match(TokenType.FOR): 4305 self.raise_error("Expecting FOR") 4306 4307 fields = [] 4308 while True: 4309 field = self._try_parse(self._parse_pivot_in) 4310 if not field: 4311 break 4312 fields.append(field) 4313 4314 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4315 self._parse_bitwise 4316 ) 4317 4318 group = self._parse_group() 4319 4320 self._match_r_paren() 4321 4322 pivot = self.expression( 4323 exp.Pivot, 4324 expressions=expressions, 4325 fields=fields, 4326 unpivot=unpivot, 4327 include_nulls=include_nulls, 4328 default_on_null=default_on_null, 4329 group=group, 4330 ) 4331 4332 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4333 pivot.set("alias", self._parse_table_alias()) 4334 4335 if not unpivot: 4336 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4337 4338 columns: t.List[exp.Expression] = [] 4339 all_fields = [] 4340 for pivot_field in pivot.fields: 4341 pivot_field_expressions = pivot_field.expressions 4342 4343 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4344 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4345 continue 4346 4347 all_fields.append( 4348 [ 4349 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4350 for fld in pivot_field_expressions 4351 ] 4352 ) 4353 4354 if all_fields: 4355 if names: 4356 all_fields.append(names) 4357 4358 # Generate all possible combinations of the pivot columns 4359 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4360 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4361 for fld_parts_tuple in itertools.product(*all_fields): 4362 fld_parts = list(fld_parts_tuple) 4363 4364 if names and self.PREFIXED_PIVOT_COLUMNS: 4365 # Move the "name" to the front of the list 4366 fld_parts.insert(0, fld_parts.pop(-1)) 4367 4368 columns.append(exp.to_identifier("_".join(fld_parts))) 4369 4370 pivot.set("columns", columns) 4371 4372 return pivot 4373 4374 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4375 return [agg.alias for agg in aggregations if agg.alias] 4376 4377 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4378 if not skip_where_token and not self._match(TokenType.PREWHERE): 4379 return None 4380 4381 return self.expression( 4382 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4383 ) 4384 4385 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4386 if not skip_where_token and not self._match(TokenType.WHERE): 4387 return None 4388 4389 return self.expression( 4390 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4391 ) 4392 4393 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4394 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4395 return None 4396 4397 elements: t.Dict[str, t.Any] = defaultdict(list) 4398 4399 if self._match(TokenType.ALL): 4400 elements["all"] = True 4401 elif self._match(TokenType.DISTINCT): 4402 elements["all"] = False 4403 4404 while True: 4405 index = self._index 4406 4407 elements["expressions"].extend( 4408 self._parse_csv( 4409 lambda: None 4410 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4411 else self._parse_assignment() 4412 ) 4413 ) 4414 4415 before_with_index = self._index 4416 with_prefix = self._match(TokenType.WITH) 4417 4418 if self._match(TokenType.ROLLUP): 4419 elements["rollup"].append( 4420 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4421 ) 4422 elif self._match(TokenType.CUBE): 4423 elements["cube"].append( 4424 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4425 ) 4426 elif self._match(TokenType.GROUPING_SETS): 4427 elements["grouping_sets"].append( 4428 self.expression( 4429 exp.GroupingSets, 4430 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4431 ) 4432 ) 4433 elif self._match_text_seq("TOTALS"): 4434 elements["totals"] = True # type: ignore 4435 4436 if before_with_index <= self._index <= before_with_index + 1: 4437 self._retreat(before_with_index) 4438 break 4439 4440 if index == self._index: 4441 break 4442 4443 return self.expression(exp.Group, **elements) # type: ignore 4444 4445 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4446 return self.expression( 4447 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4448 ) 4449 4450 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4451 if self._match(TokenType.L_PAREN): 4452 grouping_set = self._parse_csv(self._parse_column) 4453 self._match_r_paren() 4454 return self.expression(exp.Tuple, expressions=grouping_set) 4455 4456 return self._parse_column() 4457 4458 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4459 if not skip_having_token and not self._match(TokenType.HAVING): 4460 return None 4461 return self.expression(exp.Having, this=self._parse_assignment()) 4462 4463 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4464 if not self._match(TokenType.QUALIFY): 4465 return None 4466 return self.expression(exp.Qualify, this=self._parse_assignment()) 4467 4468 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4469 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4470 exp.Prior, this=self._parse_bitwise() 4471 ) 4472 connect = self._parse_assignment() 4473 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4474 return connect 4475 4476 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4477 if skip_start_token: 4478 start = None 4479 elif self._match(TokenType.START_WITH): 4480 start = self._parse_assignment() 4481 else: 4482 return None 4483 4484 self._match(TokenType.CONNECT_BY) 4485 nocycle = self._match_text_seq("NOCYCLE") 4486 connect = self._parse_connect_with_prior() 4487 4488 if not start and self._match(TokenType.START_WITH): 4489 start = self._parse_assignment() 4490 4491 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4492 4493 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4494 this = self._parse_id_var(any_token=True) 4495 if self._match(TokenType.ALIAS): 4496 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4497 return this 4498 4499 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4500 if self._match_text_seq("INTERPOLATE"): 4501 return self._parse_wrapped_csv(self._parse_name_as_expression) 4502 return None 4503 4504 def _parse_order( 4505 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4506 ) -> t.Optional[exp.Expression]: 4507 siblings = None 4508 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4509 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4510 return this 4511 4512 siblings = True 4513 4514 return self.expression( 4515 exp.Order, 4516 this=this, 4517 expressions=self._parse_csv(self._parse_ordered), 4518 siblings=siblings, 4519 ) 4520 4521 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4522 if not self._match(token): 4523 return None 4524 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4525 4526 def _parse_ordered( 4527 self, parse_method: t.Optional[t.Callable] = None 4528 ) -> t.Optional[exp.Ordered]: 4529 this = parse_method() if parse_method else self._parse_assignment() 4530 if not this: 4531 return None 4532 4533 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4534 this = exp.var("ALL") 4535 4536 asc = self._match(TokenType.ASC) 4537 desc = self._match(TokenType.DESC) or (asc and False) 4538 4539 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4540 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4541 4542 nulls_first = is_nulls_first or False 4543 explicitly_null_ordered = is_nulls_first or is_nulls_last 4544 4545 if ( 4546 not explicitly_null_ordered 4547 and ( 4548 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4549 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4550 ) 4551 and self.dialect.NULL_ORDERING != "nulls_are_last" 4552 ): 4553 nulls_first = True 4554 4555 if self._match_text_seq("WITH", "FILL"): 4556 with_fill = self.expression( 4557 exp.WithFill, 4558 **{ # type: ignore 4559 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4560 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4561 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4562 "interpolate": self._parse_interpolate(), 4563 }, 4564 ) 4565 else: 4566 with_fill = None 4567 4568 return self.expression( 4569 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4570 ) 4571 4572 def _parse_limit_options(self) -> exp.LimitOptions: 4573 percent = self._match(TokenType.PERCENT) 4574 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4575 self._match_text_seq("ONLY") 4576 with_ties = self._match_text_seq("WITH", "TIES") 4577 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4578 4579 def _parse_limit( 4580 self, 4581 this: t.Optional[exp.Expression] = None, 4582 top: bool = False, 4583 skip_limit_token: bool = False, 4584 ) -> t.Optional[exp.Expression]: 4585 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4586 comments = self._prev_comments 4587 if top: 4588 limit_paren = self._match(TokenType.L_PAREN) 4589 expression = self._parse_term() if limit_paren else self._parse_number() 4590 4591 if limit_paren: 4592 self._match_r_paren() 4593 4594 limit_options = self._parse_limit_options() 4595 else: 4596 limit_options = None 4597 expression = self._parse_term() 4598 4599 if self._match(TokenType.COMMA): 4600 offset = expression 4601 expression = self._parse_term() 4602 else: 4603 offset = None 4604 4605 limit_exp = self.expression( 4606 exp.Limit, 4607 this=this, 4608 expression=expression, 4609 offset=offset, 4610 comments=comments, 4611 limit_options=limit_options, 4612 expressions=self._parse_limit_by(), 4613 ) 4614 4615 return limit_exp 4616 4617 if self._match(TokenType.FETCH): 4618 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4619 direction = self._prev.text.upper() if direction else "FIRST" 4620 4621 count = self._parse_field(tokens=self.FETCH_TOKENS) 4622 4623 return self.expression( 4624 exp.Fetch, 4625 direction=direction, 4626 count=count, 4627 limit_options=self._parse_limit_options(), 4628 ) 4629 4630 return this 4631 4632 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4633 if not self._match(TokenType.OFFSET): 4634 return this 4635 4636 count = self._parse_term() 4637 self._match_set((TokenType.ROW, TokenType.ROWS)) 4638 4639 return self.expression( 4640 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4641 ) 4642 4643 def _can_parse_limit_or_offset(self) -> bool: 4644 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4645 return False 4646 4647 index = self._index 4648 result = bool( 4649 self._try_parse(self._parse_limit, retreat=True) 4650 or self._try_parse(self._parse_offset, retreat=True) 4651 ) 4652 self._retreat(index) 4653 return result 4654 4655 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4656 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4657 4658 def _parse_locks(self) -> t.List[exp.Lock]: 4659 locks = [] 4660 while True: 4661 if self._match_text_seq("FOR", "UPDATE"): 4662 update = True 4663 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4664 "LOCK", "IN", "SHARE", "MODE" 4665 ): 4666 update = False 4667 else: 4668 break 4669 4670 expressions = None 4671 if self._match_text_seq("OF"): 4672 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4673 4674 wait: t.Optional[bool | exp.Expression] = None 4675 if self._match_text_seq("NOWAIT"): 4676 wait = True 4677 elif self._match_text_seq("WAIT"): 4678 wait = self._parse_primary() 4679 elif self._match_text_seq("SKIP", "LOCKED"): 4680 wait = False 4681 4682 locks.append( 4683 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4684 ) 4685 4686 return locks 4687 4688 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4689 start = self._index 4690 _, side_token, kind_token = self._parse_join_parts() 4691 4692 side = side_token.text if side_token else None 4693 kind = kind_token.text if kind_token else None 4694 4695 if not self._match_set(self.SET_OPERATIONS): 4696 self._retreat(start) 4697 return None 4698 4699 token_type = self._prev.token_type 4700 4701 if token_type == TokenType.UNION: 4702 operation: t.Type[exp.SetOperation] = exp.Union 4703 elif token_type == TokenType.EXCEPT: 4704 operation = exp.Except 4705 else: 4706 operation = exp.Intersect 4707 4708 comments = self._prev.comments 4709 4710 if self._match(TokenType.DISTINCT): 4711 distinct: t.Optional[bool] = True 4712 elif self._match(TokenType.ALL): 4713 distinct = False 4714 else: 4715 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4716 if distinct is None: 4717 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4718 4719 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4720 "STRICT", "CORRESPONDING" 4721 ) 4722 if self._match_text_seq("CORRESPONDING"): 4723 by_name = True 4724 if not side and not kind: 4725 kind = "INNER" 4726 4727 on_column_list = None 4728 if by_name and self._match_texts(("ON", "BY")): 4729 on_column_list = self._parse_wrapped_csv(self._parse_column) 4730 4731 expression = self._parse_select(nested=True, parse_set_operation=False) 4732 4733 return self.expression( 4734 operation, 4735 comments=comments, 4736 this=this, 4737 distinct=distinct, 4738 by_name=by_name, 4739 expression=expression, 4740 side=side, 4741 kind=kind, 4742 on=on_column_list, 4743 ) 4744 4745 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4746 while this: 4747 setop = self.parse_set_operation(this) 4748 if not setop: 4749 break 4750 this = setop 4751 4752 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4753 expression = this.expression 4754 4755 if expression: 4756 for arg in self.SET_OP_MODIFIERS: 4757 expr = expression.args.get(arg) 4758 if expr: 4759 this.set(arg, expr.pop()) 4760 4761 return this 4762 4763 def _parse_expression(self) -> t.Optional[exp.Expression]: 4764 return self._parse_alias(self._parse_assignment()) 4765 4766 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4767 this = self._parse_disjunction() 4768 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4769 # This allows us to parse <non-identifier token> := <expr> 4770 this = exp.column( 4771 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4772 ) 4773 4774 while self._match_set(self.ASSIGNMENT): 4775 if isinstance(this, exp.Column) and len(this.parts) == 1: 4776 this = this.this 4777 4778 this = self.expression( 4779 self.ASSIGNMENT[self._prev.token_type], 4780 this=this, 4781 comments=self._prev_comments, 4782 expression=self._parse_assignment(), 4783 ) 4784 4785 return this 4786 4787 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4788 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4789 4790 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4791 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4792 4793 def _parse_equality(self) -> t.Optional[exp.Expression]: 4794 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4795 4796 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4797 return self._parse_tokens(self._parse_range, self.COMPARISON) 4798 4799 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4800 this = this or self._parse_bitwise() 4801 negate = self._match(TokenType.NOT) 4802 4803 if self._match_set(self.RANGE_PARSERS): 4804 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4805 if not expression: 4806 return this 4807 4808 this = expression 4809 elif self._match(TokenType.ISNULL): 4810 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4811 4812 # Postgres supports ISNULL and NOTNULL for conditions. 4813 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4814 if self._match(TokenType.NOTNULL): 4815 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4816 this = self.expression(exp.Not, this=this) 4817 4818 if negate: 4819 this = self._negate_range(this) 4820 4821 if self._match(TokenType.IS): 4822 this = self._parse_is(this) 4823 4824 return this 4825 4826 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4827 if not this: 4828 return this 4829 4830 return self.expression(exp.Not, this=this) 4831 4832 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4833 index = self._index - 1 4834 negate = self._match(TokenType.NOT) 4835 4836 if self._match_text_seq("DISTINCT", "FROM"): 4837 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4838 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4839 4840 if self._match(TokenType.JSON): 4841 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4842 4843 if self._match_text_seq("WITH"): 4844 _with = True 4845 elif self._match_text_seq("WITHOUT"): 4846 _with = False 4847 else: 4848 _with = None 4849 4850 unique = self._match(TokenType.UNIQUE) 4851 self._match_text_seq("KEYS") 4852 expression: t.Optional[exp.Expression] = self.expression( 4853 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4854 ) 4855 else: 4856 expression = self._parse_primary() or self._parse_null() 4857 if not expression: 4858 self._retreat(index) 4859 return None 4860 4861 this = self.expression(exp.Is, this=this, expression=expression) 4862 return self.expression(exp.Not, this=this) if negate else this 4863 4864 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4865 unnest = self._parse_unnest(with_alias=False) 4866 if unnest: 4867 this = self.expression(exp.In, this=this, unnest=unnest) 4868 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4869 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4870 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4871 4872 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4873 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4874 else: 4875 this = self.expression(exp.In, this=this, expressions=expressions) 4876 4877 if matched_l_paren: 4878 self._match_r_paren(this) 4879 elif not self._match(TokenType.R_BRACKET, expression=this): 4880 self.raise_error("Expecting ]") 4881 else: 4882 this = self.expression(exp.In, this=this, field=self._parse_column()) 4883 4884 return this 4885 4886 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4887 low = self._parse_bitwise() 4888 self._match(TokenType.AND) 4889 high = self._parse_bitwise() 4890 return self.expression(exp.Between, this=this, low=low, high=high) 4891 4892 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4893 if not self._match(TokenType.ESCAPE): 4894 return this 4895 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4896 4897 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4898 index = self._index 4899 4900 if not self._match(TokenType.INTERVAL) and match_interval: 4901 return None 4902 4903 if self._match(TokenType.STRING, advance=False): 4904 this = self._parse_primary() 4905 else: 4906 this = self._parse_term() 4907 4908 if not this or ( 4909 isinstance(this, exp.Column) 4910 and not this.table 4911 and not this.this.quoted 4912 and this.name.upper() == "IS" 4913 ): 4914 self._retreat(index) 4915 return None 4916 4917 unit = self._parse_function() or ( 4918 not self._match(TokenType.ALIAS, advance=False) 4919 and self._parse_var(any_token=True, upper=True) 4920 ) 4921 4922 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4923 # each INTERVAL expression into this canonical form so it's easy to transpile 4924 if this and this.is_number: 4925 this = exp.Literal.string(this.to_py()) 4926 elif this and this.is_string: 4927 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4928 if parts and unit: 4929 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4930 unit = None 4931 self._retreat(self._index - 1) 4932 4933 if len(parts) == 1: 4934 this = exp.Literal.string(parts[0][0]) 4935 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4936 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4937 unit = self.expression( 4938 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4939 ) 4940 4941 interval = self.expression(exp.Interval, this=this, unit=unit) 4942 4943 index = self._index 4944 self._match(TokenType.PLUS) 4945 4946 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4947 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4948 return self.expression( 4949 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4950 ) 4951 4952 self._retreat(index) 4953 return interval 4954 4955 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4956 this = self._parse_term() 4957 4958 while True: 4959 if self._match_set(self.BITWISE): 4960 this = self.expression( 4961 self.BITWISE[self._prev.token_type], 4962 this=this, 4963 expression=self._parse_term(), 4964 ) 4965 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4966 this = self.expression( 4967 exp.DPipe, 4968 this=this, 4969 expression=self._parse_term(), 4970 safe=not self.dialect.STRICT_STRING_CONCAT, 4971 ) 4972 elif self._match(TokenType.DQMARK): 4973 this = self.expression( 4974 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4975 ) 4976 elif self._match_pair(TokenType.LT, TokenType.LT): 4977 this = self.expression( 4978 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4979 ) 4980 elif self._match_pair(TokenType.GT, TokenType.GT): 4981 this = self.expression( 4982 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4983 ) 4984 else: 4985 break 4986 4987 return this 4988 4989 def _parse_term(self) -> t.Optional[exp.Expression]: 4990 this = self._parse_factor() 4991 4992 while self._match_set(self.TERM): 4993 klass = self.TERM[self._prev.token_type] 4994 comments = self._prev_comments 4995 expression = self._parse_factor() 4996 4997 this = self.expression(klass, this=this, comments=comments, expression=expression) 4998 4999 if isinstance(this, exp.Collate): 5000 expr = this.expression 5001 5002 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5003 # fallback to Identifier / Var 5004 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5005 ident = expr.this 5006 if isinstance(ident, exp.Identifier): 5007 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5008 5009 return this 5010 5011 def _parse_factor(self) -> t.Optional[exp.Expression]: 5012 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5013 this = parse_method() 5014 5015 while self._match_set(self.FACTOR): 5016 klass = self.FACTOR[self._prev.token_type] 5017 comments = self._prev_comments 5018 expression = parse_method() 5019 5020 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5021 self._retreat(self._index - 1) 5022 return this 5023 5024 this = self.expression(klass, this=this, comments=comments, expression=expression) 5025 5026 if isinstance(this, exp.Div): 5027 this.args["typed"] = self.dialect.TYPED_DIVISION 5028 this.args["safe"] = self.dialect.SAFE_DIVISION 5029 5030 return this 5031 5032 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5033 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5034 5035 def _parse_unary(self) -> t.Optional[exp.Expression]: 5036 if self._match_set(self.UNARY_PARSERS): 5037 return self.UNARY_PARSERS[self._prev.token_type](self) 5038 return self._parse_at_time_zone(self._parse_type()) 5039 5040 def _parse_type( 5041 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5042 ) -> t.Optional[exp.Expression]: 5043 interval = parse_interval and self._parse_interval() 5044 if interval: 5045 return interval 5046 5047 index = self._index 5048 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5049 5050 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5051 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5052 if isinstance(data_type, exp.Cast): 5053 # This constructor can contain ops directly after it, for instance struct unnesting: 5054 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5055 return self._parse_column_ops(data_type) 5056 5057 if data_type: 5058 index2 = self._index 5059 this = self._parse_primary() 5060 5061 if isinstance(this, exp.Literal): 5062 this = self._parse_column_ops(this) 5063 5064 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5065 if parser: 5066 return parser(self, this, data_type) 5067 5068 return self.expression(exp.Cast, this=this, to=data_type) 5069 5070 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5071 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5072 # 5073 # If the index difference here is greater than 1, that means the parser itself must have 5074 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5075 # 5076 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5077 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5078 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5079 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5080 # 5081 # In these cases, we don't really want to return the converted type, but instead retreat 5082 # and try to parse a Column or Identifier in the section below. 5083 if data_type.expressions and index2 - index > 1: 5084 self._retreat(index2) 5085 return self._parse_column_ops(data_type) 5086 5087 self._retreat(index) 5088 5089 if fallback_to_identifier: 5090 return self._parse_id_var() 5091 5092 this = self._parse_column() 5093 return this and self._parse_column_ops(this) 5094 5095 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5096 this = self._parse_type() 5097 if not this: 5098 return None 5099 5100 if isinstance(this, exp.Column) and not this.table: 5101 this = exp.var(this.name.upper()) 5102 5103 return self.expression( 5104 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5105 ) 5106 5107 def _parse_types( 5108 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5109 ) -> t.Optional[exp.Expression]: 5110 index = self._index 5111 5112 this: t.Optional[exp.Expression] = None 5113 prefix = self._match_text_seq("SYSUDTLIB", ".") 5114 5115 if not self._match_set(self.TYPE_TOKENS): 5116 identifier = allow_identifiers and self._parse_id_var( 5117 any_token=False, tokens=(TokenType.VAR,) 5118 ) 5119 if isinstance(identifier, exp.Identifier): 5120 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5121 5122 if len(tokens) != 1: 5123 self.raise_error("Unexpected identifier", self._prev) 5124 5125 if tokens[0].token_type in self.TYPE_TOKENS: 5126 self._prev = tokens[0] 5127 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5128 type_name = identifier.name 5129 5130 while self._match(TokenType.DOT): 5131 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5132 5133 this = exp.DataType.build(type_name, udt=True) 5134 else: 5135 self._retreat(self._index - 1) 5136 return None 5137 else: 5138 return None 5139 5140 type_token = self._prev.token_type 5141 5142 if type_token == TokenType.PSEUDO_TYPE: 5143 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5144 5145 if type_token == TokenType.OBJECT_IDENTIFIER: 5146 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5147 5148 # https://materialize.com/docs/sql/types/map/ 5149 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5150 key_type = self._parse_types( 5151 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5152 ) 5153 if not self._match(TokenType.FARROW): 5154 self._retreat(index) 5155 return None 5156 5157 value_type = self._parse_types( 5158 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5159 ) 5160 if not self._match(TokenType.R_BRACKET): 5161 self._retreat(index) 5162 return None 5163 5164 return exp.DataType( 5165 this=exp.DataType.Type.MAP, 5166 expressions=[key_type, value_type], 5167 nested=True, 5168 prefix=prefix, 5169 ) 5170 5171 nested = type_token in self.NESTED_TYPE_TOKENS 5172 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5173 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5174 expressions = None 5175 maybe_func = False 5176 5177 if self._match(TokenType.L_PAREN): 5178 if is_struct: 5179 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5180 elif nested: 5181 expressions = self._parse_csv( 5182 lambda: self._parse_types( 5183 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5184 ) 5185 ) 5186 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5187 this = expressions[0] 5188 this.set("nullable", True) 5189 self._match_r_paren() 5190 return this 5191 elif type_token in self.ENUM_TYPE_TOKENS: 5192 expressions = self._parse_csv(self._parse_equality) 5193 elif is_aggregate: 5194 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5195 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5196 ) 5197 if not func_or_ident: 5198 return None 5199 expressions = [func_or_ident] 5200 if self._match(TokenType.COMMA): 5201 expressions.extend( 5202 self._parse_csv( 5203 lambda: self._parse_types( 5204 check_func=check_func, 5205 schema=schema, 5206 allow_identifiers=allow_identifiers, 5207 ) 5208 ) 5209 ) 5210 else: 5211 expressions = self._parse_csv(self._parse_type_size) 5212 5213 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5214 if type_token == TokenType.VECTOR and len(expressions) == 2: 5215 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5216 5217 if not expressions or not self._match(TokenType.R_PAREN): 5218 self._retreat(index) 5219 return None 5220 5221 maybe_func = True 5222 5223 values: t.Optional[t.List[exp.Expression]] = None 5224 5225 if nested and self._match(TokenType.LT): 5226 if is_struct: 5227 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5228 else: 5229 expressions = self._parse_csv( 5230 lambda: self._parse_types( 5231 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5232 ) 5233 ) 5234 5235 if not self._match(TokenType.GT): 5236 self.raise_error("Expecting >") 5237 5238 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5239 values = self._parse_csv(self._parse_assignment) 5240 if not values and is_struct: 5241 values = None 5242 self._retreat(self._index - 1) 5243 else: 5244 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5245 5246 if type_token in self.TIMESTAMPS: 5247 if self._match_text_seq("WITH", "TIME", "ZONE"): 5248 maybe_func = False 5249 tz_type = ( 5250 exp.DataType.Type.TIMETZ 5251 if type_token in self.TIMES 5252 else exp.DataType.Type.TIMESTAMPTZ 5253 ) 5254 this = exp.DataType(this=tz_type, expressions=expressions) 5255 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5256 maybe_func = False 5257 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5258 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5259 maybe_func = False 5260 elif type_token == TokenType.INTERVAL: 5261 unit = self._parse_var(upper=True) 5262 if unit: 5263 if self._match_text_seq("TO"): 5264 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5265 5266 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5267 else: 5268 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5269 elif type_token == TokenType.VOID: 5270 this = exp.DataType(this=exp.DataType.Type.NULL) 5271 5272 if maybe_func and check_func: 5273 index2 = self._index 5274 peek = self._parse_string() 5275 5276 if not peek: 5277 self._retreat(index) 5278 return None 5279 5280 self._retreat(index2) 5281 5282 if not this: 5283 if self._match_text_seq("UNSIGNED"): 5284 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5285 if not unsigned_type_token: 5286 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5287 5288 type_token = unsigned_type_token or type_token 5289 5290 this = exp.DataType( 5291 this=exp.DataType.Type[type_token.value], 5292 expressions=expressions, 5293 nested=nested, 5294 prefix=prefix, 5295 ) 5296 5297 # Empty arrays/structs are allowed 5298 if values is not None: 5299 cls = exp.Struct if is_struct else exp.Array 5300 this = exp.cast(cls(expressions=values), this, copy=False) 5301 5302 elif expressions: 5303 this.set("expressions", expressions) 5304 5305 # https://materialize.com/docs/sql/types/list/#type-name 5306 while self._match(TokenType.LIST): 5307 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5308 5309 index = self._index 5310 5311 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5312 matched_array = self._match(TokenType.ARRAY) 5313 5314 while self._curr: 5315 datatype_token = self._prev.token_type 5316 matched_l_bracket = self._match(TokenType.L_BRACKET) 5317 5318 if (not matched_l_bracket and not matched_array) or ( 5319 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5320 ): 5321 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5322 # not to be confused with the fixed size array parsing 5323 break 5324 5325 matched_array = False 5326 values = self._parse_csv(self._parse_assignment) or None 5327 if ( 5328 values 5329 and not schema 5330 and ( 5331 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5332 ) 5333 ): 5334 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5335 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5336 self._retreat(index) 5337 break 5338 5339 this = exp.DataType( 5340 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5341 ) 5342 self._match(TokenType.R_BRACKET) 5343 5344 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5345 converter = self.TYPE_CONVERTERS.get(this.this) 5346 if converter: 5347 this = converter(t.cast(exp.DataType, this)) 5348 5349 return this 5350 5351 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5352 index = self._index 5353 5354 if ( 5355 self._curr 5356 and self._next 5357 and self._curr.token_type in self.TYPE_TOKENS 5358 and self._next.token_type in self.TYPE_TOKENS 5359 ): 5360 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5361 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5362 this = self._parse_id_var() 5363 else: 5364 this = ( 5365 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5366 or self._parse_id_var() 5367 ) 5368 5369 self._match(TokenType.COLON) 5370 5371 if ( 5372 type_required 5373 and not isinstance(this, exp.DataType) 5374 and not self._match_set(self.TYPE_TOKENS, advance=False) 5375 ): 5376 self._retreat(index) 5377 return self._parse_types() 5378 5379 return self._parse_column_def(this) 5380 5381 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5382 if not self._match_text_seq("AT", "TIME", "ZONE"): 5383 return this 5384 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5385 5386 def _parse_column(self) -> t.Optional[exp.Expression]: 5387 this = self._parse_column_reference() 5388 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5389 5390 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5391 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5392 5393 return column 5394 5395 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5396 this = self._parse_field() 5397 if ( 5398 not this 5399 and self._match(TokenType.VALUES, advance=False) 5400 and self.VALUES_FOLLOWED_BY_PAREN 5401 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5402 ): 5403 this = self._parse_id_var() 5404 5405 if isinstance(this, exp.Identifier): 5406 # We bubble up comments from the Identifier to the Column 5407 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5408 5409 return this 5410 5411 def _parse_colon_as_variant_extract( 5412 self, this: t.Optional[exp.Expression] 5413 ) -> t.Optional[exp.Expression]: 5414 casts = [] 5415 json_path = [] 5416 escape = None 5417 5418 while self._match(TokenType.COLON): 5419 start_index = self._index 5420 5421 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5422 path = self._parse_column_ops( 5423 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5424 ) 5425 5426 # The cast :: operator has a lower precedence than the extraction operator :, so 5427 # we rearrange the AST appropriately to avoid casting the JSON path 5428 while isinstance(path, exp.Cast): 5429 casts.append(path.to) 5430 path = path.this 5431 5432 if casts: 5433 dcolon_offset = next( 5434 i 5435 for i, t in enumerate(self._tokens[start_index:]) 5436 if t.token_type == TokenType.DCOLON 5437 ) 5438 end_token = self._tokens[start_index + dcolon_offset - 1] 5439 else: 5440 end_token = self._prev 5441 5442 if path: 5443 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5444 # it'll roundtrip to a string literal in GET_PATH 5445 if isinstance(path, exp.Identifier) and path.quoted: 5446 escape = True 5447 5448 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5449 5450 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5451 # Databricks transforms it back to the colon/dot notation 5452 if json_path: 5453 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5454 5455 if json_path_expr: 5456 json_path_expr.set("escape", escape) 5457 5458 this = self.expression( 5459 exp.JSONExtract, 5460 this=this, 5461 expression=json_path_expr, 5462 variant_extract=True, 5463 ) 5464 5465 while casts: 5466 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5467 5468 return this 5469 5470 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5471 return self._parse_types() 5472 5473 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5474 this = self._parse_bracket(this) 5475 5476 while self._match_set(self.COLUMN_OPERATORS): 5477 op_token = self._prev.token_type 5478 op = self.COLUMN_OPERATORS.get(op_token) 5479 5480 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5481 field = self._parse_dcolon() 5482 if not field: 5483 self.raise_error("Expected type") 5484 elif op and self._curr: 5485 field = self._parse_column_reference() or self._parse_bracket() 5486 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5487 field = self._parse_column_ops(field) 5488 else: 5489 field = self._parse_field(any_token=True, anonymous_func=True) 5490 5491 if isinstance(field, (exp.Func, exp.Window)) and this: 5492 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5493 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5494 this = exp.replace_tree( 5495 this, 5496 lambda n: ( 5497 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5498 if n.table 5499 else n.this 5500 ) 5501 if isinstance(n, exp.Column) 5502 else n, 5503 ) 5504 5505 if op: 5506 this = op(self, this, field) 5507 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5508 this = self.expression( 5509 exp.Column, 5510 comments=this.comments, 5511 this=field, 5512 table=this.this, 5513 db=this.args.get("table"), 5514 catalog=this.args.get("db"), 5515 ) 5516 elif isinstance(field, exp.Window): 5517 # Move the exp.Dot's to the window's function 5518 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5519 field.set("this", window_func) 5520 this = field 5521 else: 5522 this = self.expression(exp.Dot, this=this, expression=field) 5523 5524 if field and field.comments: 5525 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5526 5527 this = self._parse_bracket(this) 5528 5529 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5530 5531 def _parse_primary(self) -> t.Optional[exp.Expression]: 5532 if self._match_set(self.PRIMARY_PARSERS): 5533 token_type = self._prev.token_type 5534 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5535 5536 if token_type == TokenType.STRING: 5537 expressions = [primary] 5538 while self._match(TokenType.STRING): 5539 expressions.append(exp.Literal.string(self._prev.text)) 5540 5541 if len(expressions) > 1: 5542 return self.expression(exp.Concat, expressions=expressions) 5543 5544 return primary 5545 5546 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5547 return exp.Literal.number(f"0.{self._prev.text}") 5548 5549 if self._match(TokenType.L_PAREN): 5550 comments = self._prev_comments 5551 query = self._parse_select() 5552 5553 if query: 5554 expressions = [query] 5555 else: 5556 expressions = self._parse_expressions() 5557 5558 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5559 5560 if not this and self._match(TokenType.R_PAREN, advance=False): 5561 this = self.expression(exp.Tuple) 5562 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5563 this = self._parse_subquery(this=this, parse_alias=False) 5564 elif isinstance(this, exp.Subquery): 5565 this = self._parse_subquery( 5566 this=self._parse_set_operations(this), parse_alias=False 5567 ) 5568 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5569 this = self.expression(exp.Tuple, expressions=expressions) 5570 else: 5571 this = self.expression(exp.Paren, this=this) 5572 5573 if this: 5574 this.add_comments(comments) 5575 5576 self._match_r_paren(expression=this) 5577 return this 5578 5579 return None 5580 5581 def _parse_field( 5582 self, 5583 any_token: bool = False, 5584 tokens: t.Optional[t.Collection[TokenType]] = None, 5585 anonymous_func: bool = False, 5586 ) -> t.Optional[exp.Expression]: 5587 if anonymous_func: 5588 field = ( 5589 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5590 or self._parse_primary() 5591 ) 5592 else: 5593 field = self._parse_primary() or self._parse_function( 5594 anonymous=anonymous_func, any_token=any_token 5595 ) 5596 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5597 5598 def _parse_function( 5599 self, 5600 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5601 anonymous: bool = False, 5602 optional_parens: bool = True, 5603 any_token: bool = False, 5604 ) -> t.Optional[exp.Expression]: 5605 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5606 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5607 fn_syntax = False 5608 if ( 5609 self._match(TokenType.L_BRACE, advance=False) 5610 and self._next 5611 and self._next.text.upper() == "FN" 5612 ): 5613 self._advance(2) 5614 fn_syntax = True 5615 5616 func = self._parse_function_call( 5617 functions=functions, 5618 anonymous=anonymous, 5619 optional_parens=optional_parens, 5620 any_token=any_token, 5621 ) 5622 5623 if fn_syntax: 5624 self._match(TokenType.R_BRACE) 5625 5626 return func 5627 5628 def _parse_function_call( 5629 self, 5630 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5631 anonymous: bool = False, 5632 optional_parens: bool = True, 5633 any_token: bool = False, 5634 ) -> t.Optional[exp.Expression]: 5635 if not self._curr: 5636 return None 5637 5638 comments = self._curr.comments 5639 token = self._curr 5640 token_type = self._curr.token_type 5641 this = self._curr.text 5642 upper = this.upper() 5643 5644 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5645 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5646 self._advance() 5647 return self._parse_window(parser(self)) 5648 5649 if not self._next or self._next.token_type != TokenType.L_PAREN: 5650 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5651 self._advance() 5652 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5653 5654 return None 5655 5656 if any_token: 5657 if token_type in self.RESERVED_TOKENS: 5658 return None 5659 elif token_type not in self.FUNC_TOKENS: 5660 return None 5661 5662 self._advance(2) 5663 5664 parser = self.FUNCTION_PARSERS.get(upper) 5665 if parser and not anonymous: 5666 this = parser(self) 5667 else: 5668 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5669 5670 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5671 this = self.expression( 5672 subquery_predicate, comments=comments, this=self._parse_select() 5673 ) 5674 self._match_r_paren() 5675 return this 5676 5677 if functions is None: 5678 functions = self.FUNCTIONS 5679 5680 function = functions.get(upper) 5681 known_function = function and not anonymous 5682 5683 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5684 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5685 5686 post_func_comments = self._curr and self._curr.comments 5687 if known_function and post_func_comments: 5688 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5689 # call we'll construct it as exp.Anonymous, even if it's "known" 5690 if any( 5691 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5692 for comment in post_func_comments 5693 ): 5694 known_function = False 5695 5696 if alias and known_function: 5697 args = self._kv_to_prop_eq(args) 5698 5699 if known_function: 5700 func_builder = t.cast(t.Callable, function) 5701 5702 if "dialect" in func_builder.__code__.co_varnames: 5703 func = func_builder(args, dialect=self.dialect) 5704 else: 5705 func = func_builder(args) 5706 5707 func = self.validate_expression(func, args) 5708 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5709 func.meta["name"] = this 5710 5711 this = func 5712 else: 5713 if token_type == TokenType.IDENTIFIER: 5714 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5715 this = self.expression(exp.Anonymous, this=this, expressions=args) 5716 5717 if isinstance(this, exp.Expression): 5718 this.add_comments(comments) 5719 5720 self._match_r_paren(this) 5721 return self._parse_window(this) 5722 5723 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5724 return expression 5725 5726 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5727 transformed = [] 5728 5729 for index, e in enumerate(expressions): 5730 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5731 if isinstance(e, exp.Alias): 5732 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5733 5734 if not isinstance(e, exp.PropertyEQ): 5735 e = self.expression( 5736 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5737 ) 5738 5739 if isinstance(e.this, exp.Column): 5740 e.this.replace(e.this.this) 5741 else: 5742 e = self._to_prop_eq(e, index) 5743 5744 transformed.append(e) 5745 5746 return transformed 5747 5748 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5749 return self._parse_statement() 5750 5751 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5752 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5753 5754 def _parse_user_defined_function( 5755 self, kind: t.Optional[TokenType] = None 5756 ) -> t.Optional[exp.Expression]: 5757 this = self._parse_table_parts(schema=True) 5758 5759 if not self._match(TokenType.L_PAREN): 5760 return this 5761 5762 expressions = self._parse_csv(self._parse_function_parameter) 5763 self._match_r_paren() 5764 return self.expression( 5765 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5766 ) 5767 5768 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5769 literal = self._parse_primary() 5770 if literal: 5771 return self.expression(exp.Introducer, this=token.text, expression=literal) 5772 5773 return self._identifier_expression(token) 5774 5775 def _parse_session_parameter(self) -> exp.SessionParameter: 5776 kind = None 5777 this = self._parse_id_var() or self._parse_primary() 5778 5779 if this and self._match(TokenType.DOT): 5780 kind = this.name 5781 this = self._parse_var() or self._parse_primary() 5782 5783 return self.expression(exp.SessionParameter, this=this, kind=kind) 5784 5785 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5786 return self._parse_id_var() 5787 5788 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5789 index = self._index 5790 5791 if self._match(TokenType.L_PAREN): 5792 expressions = t.cast( 5793 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5794 ) 5795 5796 if not self._match(TokenType.R_PAREN): 5797 self._retreat(index) 5798 else: 5799 expressions = [self._parse_lambda_arg()] 5800 5801 if self._match_set(self.LAMBDAS): 5802 return self.LAMBDAS[self._prev.token_type](self, expressions) 5803 5804 self._retreat(index) 5805 5806 this: t.Optional[exp.Expression] 5807 5808 if self._match(TokenType.DISTINCT): 5809 this = self.expression( 5810 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5811 ) 5812 else: 5813 this = self._parse_select_or_expression(alias=alias) 5814 5815 return self._parse_limit( 5816 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5817 ) 5818 5819 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5820 index = self._index 5821 if not self._match(TokenType.L_PAREN): 5822 return this 5823 5824 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5825 # expr can be of both types 5826 if self._match_set(self.SELECT_START_TOKENS): 5827 self._retreat(index) 5828 return this 5829 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5830 self._match_r_paren() 5831 return self.expression(exp.Schema, this=this, expressions=args) 5832 5833 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5834 return self._parse_column_def(self._parse_field(any_token=True)) 5835 5836 def _parse_column_def( 5837 self, this: t.Optional[exp.Expression], computed_column: bool = True 5838 ) -> t.Optional[exp.Expression]: 5839 # column defs are not really columns, they're identifiers 5840 if isinstance(this, exp.Column): 5841 this = this.this 5842 5843 if not computed_column: 5844 self._match(TokenType.ALIAS) 5845 5846 kind = self._parse_types(schema=True) 5847 5848 if self._match_text_seq("FOR", "ORDINALITY"): 5849 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5850 5851 constraints: t.List[exp.Expression] = [] 5852 5853 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5854 ("ALIAS", "MATERIALIZED") 5855 ): 5856 persisted = self._prev.text.upper() == "MATERIALIZED" 5857 constraint_kind = exp.ComputedColumnConstraint( 5858 this=self._parse_assignment(), 5859 persisted=persisted or self._match_text_seq("PERSISTED"), 5860 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5861 ) 5862 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5863 elif ( 5864 kind 5865 and self._match(TokenType.ALIAS, advance=False) 5866 and ( 5867 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5868 or (self._next and self._next.token_type == TokenType.L_PAREN) 5869 ) 5870 ): 5871 self._advance() 5872 constraints.append( 5873 self.expression( 5874 exp.ColumnConstraint, 5875 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5876 ) 5877 ) 5878 5879 while True: 5880 constraint = self._parse_column_constraint() 5881 if not constraint: 5882 break 5883 constraints.append(constraint) 5884 5885 if not kind and not constraints: 5886 return this 5887 5888 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5889 5890 def _parse_auto_increment( 5891 self, 5892 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5893 start = None 5894 increment = None 5895 5896 if self._match(TokenType.L_PAREN, advance=False): 5897 args = self._parse_wrapped_csv(self._parse_bitwise) 5898 start = seq_get(args, 0) 5899 increment = seq_get(args, 1) 5900 elif self._match_text_seq("START"): 5901 start = self._parse_bitwise() 5902 self._match_text_seq("INCREMENT") 5903 increment = self._parse_bitwise() 5904 5905 if start and increment: 5906 return exp.GeneratedAsIdentityColumnConstraint( 5907 start=start, increment=increment, this=False 5908 ) 5909 5910 return exp.AutoIncrementColumnConstraint() 5911 5912 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5913 if not self._match_text_seq("REFRESH"): 5914 self._retreat(self._index - 1) 5915 return None 5916 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5917 5918 def _parse_compress(self) -> exp.CompressColumnConstraint: 5919 if self._match(TokenType.L_PAREN, advance=False): 5920 return self.expression( 5921 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5922 ) 5923 5924 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5925 5926 def _parse_generated_as_identity( 5927 self, 5928 ) -> ( 5929 exp.GeneratedAsIdentityColumnConstraint 5930 | exp.ComputedColumnConstraint 5931 | exp.GeneratedAsRowColumnConstraint 5932 ): 5933 if self._match_text_seq("BY", "DEFAULT"): 5934 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5935 this = self.expression( 5936 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5937 ) 5938 else: 5939 self._match_text_seq("ALWAYS") 5940 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5941 5942 self._match(TokenType.ALIAS) 5943 5944 if self._match_text_seq("ROW"): 5945 start = self._match_text_seq("START") 5946 if not start: 5947 self._match(TokenType.END) 5948 hidden = self._match_text_seq("HIDDEN") 5949 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5950 5951 identity = self._match_text_seq("IDENTITY") 5952 5953 if self._match(TokenType.L_PAREN): 5954 if self._match(TokenType.START_WITH): 5955 this.set("start", self._parse_bitwise()) 5956 if self._match_text_seq("INCREMENT", "BY"): 5957 this.set("increment", self._parse_bitwise()) 5958 if self._match_text_seq("MINVALUE"): 5959 this.set("minvalue", self._parse_bitwise()) 5960 if self._match_text_seq("MAXVALUE"): 5961 this.set("maxvalue", self._parse_bitwise()) 5962 5963 if self._match_text_seq("CYCLE"): 5964 this.set("cycle", True) 5965 elif self._match_text_seq("NO", "CYCLE"): 5966 this.set("cycle", False) 5967 5968 if not identity: 5969 this.set("expression", self._parse_range()) 5970 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5971 args = self._parse_csv(self._parse_bitwise) 5972 this.set("start", seq_get(args, 0)) 5973 this.set("increment", seq_get(args, 1)) 5974 5975 self._match_r_paren() 5976 5977 return this 5978 5979 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5980 self._match_text_seq("LENGTH") 5981 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5982 5983 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5984 if self._match_text_seq("NULL"): 5985 return self.expression(exp.NotNullColumnConstraint) 5986 if self._match_text_seq("CASESPECIFIC"): 5987 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5988 if self._match_text_seq("FOR", "REPLICATION"): 5989 return self.expression(exp.NotForReplicationColumnConstraint) 5990 5991 # Unconsume the `NOT` token 5992 self._retreat(self._index - 1) 5993 return None 5994 5995 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5996 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5997 5998 procedure_option_follows = ( 5999 self._match(TokenType.WITH, advance=False) 6000 and self._next 6001 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6002 ) 6003 6004 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6005 return self.expression( 6006 exp.ColumnConstraint, 6007 this=this, 6008 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6009 ) 6010 6011 return this 6012 6013 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6014 if not self._match(TokenType.CONSTRAINT): 6015 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6016 6017 return self.expression( 6018 exp.Constraint, 6019 this=self._parse_id_var(), 6020 expressions=self._parse_unnamed_constraints(), 6021 ) 6022 6023 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6024 constraints = [] 6025 while True: 6026 constraint = self._parse_unnamed_constraint() or self._parse_function() 6027 if not constraint: 6028 break 6029 constraints.append(constraint) 6030 6031 return constraints 6032 6033 def _parse_unnamed_constraint( 6034 self, constraints: t.Optional[t.Collection[str]] = None 6035 ) -> t.Optional[exp.Expression]: 6036 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6037 constraints or self.CONSTRAINT_PARSERS 6038 ): 6039 return None 6040 6041 constraint = self._prev.text.upper() 6042 if constraint not in self.CONSTRAINT_PARSERS: 6043 self.raise_error(f"No parser found for schema constraint {constraint}.") 6044 6045 return self.CONSTRAINT_PARSERS[constraint](self) 6046 6047 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6048 return self._parse_id_var(any_token=False) 6049 6050 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6051 self._match_text_seq("KEY") 6052 return self.expression( 6053 exp.UniqueColumnConstraint, 6054 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6055 this=self._parse_schema(self._parse_unique_key()), 6056 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6057 on_conflict=self._parse_on_conflict(), 6058 options=self._parse_key_constraint_options(), 6059 ) 6060 6061 def _parse_key_constraint_options(self) -> t.List[str]: 6062 options = [] 6063 while True: 6064 if not self._curr: 6065 break 6066 6067 if self._match(TokenType.ON): 6068 action = None 6069 on = self._advance_any() and self._prev.text 6070 6071 if self._match_text_seq("NO", "ACTION"): 6072 action = "NO ACTION" 6073 elif self._match_text_seq("CASCADE"): 6074 action = "CASCADE" 6075 elif self._match_text_seq("RESTRICT"): 6076 action = "RESTRICT" 6077 elif self._match_pair(TokenType.SET, TokenType.NULL): 6078 action = "SET NULL" 6079 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6080 action = "SET DEFAULT" 6081 else: 6082 self.raise_error("Invalid key constraint") 6083 6084 options.append(f"ON {on} {action}") 6085 else: 6086 var = self._parse_var_from_options( 6087 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6088 ) 6089 if not var: 6090 break 6091 options.append(var.name) 6092 6093 return options 6094 6095 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6096 if match and not self._match(TokenType.REFERENCES): 6097 return None 6098 6099 expressions = None 6100 this = self._parse_table(schema=True) 6101 options = self._parse_key_constraint_options() 6102 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6103 6104 def _parse_foreign_key(self) -> exp.ForeignKey: 6105 expressions = ( 6106 self._parse_wrapped_id_vars() 6107 if not self._match(TokenType.REFERENCES, advance=False) 6108 else None 6109 ) 6110 reference = self._parse_references() 6111 on_options = {} 6112 6113 while self._match(TokenType.ON): 6114 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6115 self.raise_error("Expected DELETE or UPDATE") 6116 6117 kind = self._prev.text.lower() 6118 6119 if self._match_text_seq("NO", "ACTION"): 6120 action = "NO ACTION" 6121 elif self._match(TokenType.SET): 6122 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6123 action = "SET " + self._prev.text.upper() 6124 else: 6125 self._advance() 6126 action = self._prev.text.upper() 6127 6128 on_options[kind] = action 6129 6130 return self.expression( 6131 exp.ForeignKey, 6132 expressions=expressions, 6133 reference=reference, 6134 options=self._parse_key_constraint_options(), 6135 **on_options, # type: ignore 6136 ) 6137 6138 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6139 return self._parse_ordered() or self._parse_field() 6140 6141 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6142 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6143 self._retreat(self._index - 1) 6144 return None 6145 6146 id_vars = self._parse_wrapped_id_vars() 6147 return self.expression( 6148 exp.PeriodForSystemTimeConstraint, 6149 this=seq_get(id_vars, 0), 6150 expression=seq_get(id_vars, 1), 6151 ) 6152 6153 def _parse_primary_key( 6154 self, wrapped_optional: bool = False, in_props: bool = False 6155 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6156 desc = ( 6157 self._match_set((TokenType.ASC, TokenType.DESC)) 6158 and self._prev.token_type == TokenType.DESC 6159 ) 6160 6161 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6162 return self.expression( 6163 exp.PrimaryKeyColumnConstraint, 6164 desc=desc, 6165 options=self._parse_key_constraint_options(), 6166 ) 6167 6168 expressions = self._parse_wrapped_csv( 6169 self._parse_primary_key_part, optional=wrapped_optional 6170 ) 6171 options = self._parse_key_constraint_options() 6172 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6173 6174 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6175 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6176 6177 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6178 """ 6179 Parses a datetime column in ODBC format. We parse the column into the corresponding 6180 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6181 same as we did for `DATE('yyyy-mm-dd')`. 6182 6183 Reference: 6184 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6185 """ 6186 self._match(TokenType.VAR) 6187 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6188 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6189 if not self._match(TokenType.R_BRACE): 6190 self.raise_error("Expected }") 6191 return expression 6192 6193 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6194 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6195 return this 6196 6197 bracket_kind = self._prev.token_type 6198 if ( 6199 bracket_kind == TokenType.L_BRACE 6200 and self._curr 6201 and self._curr.token_type == TokenType.VAR 6202 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6203 ): 6204 return self._parse_odbc_datetime_literal() 6205 6206 expressions = self._parse_csv( 6207 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6208 ) 6209 6210 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6211 self.raise_error("Expected ]") 6212 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6213 self.raise_error("Expected }") 6214 6215 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6216 if bracket_kind == TokenType.L_BRACE: 6217 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6218 elif not this: 6219 this = build_array_constructor( 6220 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6221 ) 6222 else: 6223 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6224 if constructor_type: 6225 return build_array_constructor( 6226 constructor_type, 6227 args=expressions, 6228 bracket_kind=bracket_kind, 6229 dialect=self.dialect, 6230 ) 6231 6232 expressions = apply_index_offset( 6233 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6234 ) 6235 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6236 6237 self._add_comments(this) 6238 return self._parse_bracket(this) 6239 6240 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6241 if self._match(TokenType.COLON): 6242 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6243 return this 6244 6245 def _parse_case(self) -> t.Optional[exp.Expression]: 6246 ifs = [] 6247 default = None 6248 6249 comments = self._prev_comments 6250 expression = self._parse_assignment() 6251 6252 while self._match(TokenType.WHEN): 6253 this = self._parse_assignment() 6254 self._match(TokenType.THEN) 6255 then = self._parse_assignment() 6256 ifs.append(self.expression(exp.If, this=this, true=then)) 6257 6258 if self._match(TokenType.ELSE): 6259 default = self._parse_assignment() 6260 6261 if not self._match(TokenType.END): 6262 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6263 default = exp.column("interval") 6264 else: 6265 self.raise_error("Expected END after CASE", self._prev) 6266 6267 return self.expression( 6268 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6269 ) 6270 6271 def _parse_if(self) -> t.Optional[exp.Expression]: 6272 if self._match(TokenType.L_PAREN): 6273 args = self._parse_csv( 6274 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6275 ) 6276 this = self.validate_expression(exp.If.from_arg_list(args), args) 6277 self._match_r_paren() 6278 else: 6279 index = self._index - 1 6280 6281 if self.NO_PAREN_IF_COMMANDS and index == 0: 6282 return self._parse_as_command(self._prev) 6283 6284 condition = self._parse_assignment() 6285 6286 if not condition: 6287 self._retreat(index) 6288 return None 6289 6290 self._match(TokenType.THEN) 6291 true = self._parse_assignment() 6292 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6293 self._match(TokenType.END) 6294 this = self.expression(exp.If, this=condition, true=true, false=false) 6295 6296 return this 6297 6298 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6299 if not self._match_text_seq("VALUE", "FOR"): 6300 self._retreat(self._index - 1) 6301 return None 6302 6303 return self.expression( 6304 exp.NextValueFor, 6305 this=self._parse_column(), 6306 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6307 ) 6308 6309 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6310 this = self._parse_function() or self._parse_var_or_string(upper=True) 6311 6312 if self._match(TokenType.FROM): 6313 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6314 6315 if not self._match(TokenType.COMMA): 6316 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6317 6318 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6319 6320 def _parse_gap_fill(self) -> exp.GapFill: 6321 self._match(TokenType.TABLE) 6322 this = self._parse_table() 6323 6324 self._match(TokenType.COMMA) 6325 args = [this, *self._parse_csv(self._parse_lambda)] 6326 6327 gap_fill = exp.GapFill.from_arg_list(args) 6328 return self.validate_expression(gap_fill, args) 6329 6330 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6331 this = self._parse_assignment() 6332 6333 if not self._match(TokenType.ALIAS): 6334 if self._match(TokenType.COMMA): 6335 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6336 6337 self.raise_error("Expected AS after CAST") 6338 6339 fmt = None 6340 to = self._parse_types() 6341 6342 default = self._match(TokenType.DEFAULT) 6343 if default: 6344 default = self._parse_bitwise() 6345 self._match_text_seq("ON", "CONVERSION", "ERROR") 6346 6347 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6348 fmt_string = self._parse_string() 6349 fmt = self._parse_at_time_zone(fmt_string) 6350 6351 if not to: 6352 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6353 if to.this in exp.DataType.TEMPORAL_TYPES: 6354 this = self.expression( 6355 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6356 this=this, 6357 format=exp.Literal.string( 6358 format_time( 6359 fmt_string.this if fmt_string else "", 6360 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6361 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6362 ) 6363 ), 6364 safe=safe, 6365 ) 6366 6367 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6368 this.set("zone", fmt.args["zone"]) 6369 return this 6370 elif not to: 6371 self.raise_error("Expected TYPE after CAST") 6372 elif isinstance(to, exp.Identifier): 6373 to = exp.DataType.build(to.name, udt=True) 6374 elif to.this == exp.DataType.Type.CHAR: 6375 if self._match(TokenType.CHARACTER_SET): 6376 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6377 6378 return self.expression( 6379 exp.Cast if strict else exp.TryCast, 6380 this=this, 6381 to=to, 6382 format=fmt, 6383 safe=safe, 6384 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6385 default=default, 6386 ) 6387 6388 def _parse_string_agg(self) -> exp.GroupConcat: 6389 if self._match(TokenType.DISTINCT): 6390 args: t.List[t.Optional[exp.Expression]] = [ 6391 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6392 ] 6393 if self._match(TokenType.COMMA): 6394 args.extend(self._parse_csv(self._parse_assignment)) 6395 else: 6396 args = self._parse_csv(self._parse_assignment) # type: ignore 6397 6398 if self._match_text_seq("ON", "OVERFLOW"): 6399 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6400 if self._match_text_seq("ERROR"): 6401 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6402 else: 6403 self._match_text_seq("TRUNCATE") 6404 on_overflow = self.expression( 6405 exp.OverflowTruncateBehavior, 6406 this=self._parse_string(), 6407 with_count=( 6408 self._match_text_seq("WITH", "COUNT") 6409 or not self._match_text_seq("WITHOUT", "COUNT") 6410 ), 6411 ) 6412 else: 6413 on_overflow = None 6414 6415 index = self._index 6416 if not self._match(TokenType.R_PAREN) and args: 6417 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6418 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6419 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6420 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6421 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6422 6423 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6424 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6425 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6426 if not self._match_text_seq("WITHIN", "GROUP"): 6427 self._retreat(index) 6428 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6429 6430 # The corresponding match_r_paren will be called in parse_function (caller) 6431 self._match_l_paren() 6432 6433 return self.expression( 6434 exp.GroupConcat, 6435 this=self._parse_order(this=seq_get(args, 0)), 6436 separator=seq_get(args, 1), 6437 on_overflow=on_overflow, 6438 ) 6439 6440 def _parse_convert( 6441 self, strict: bool, safe: t.Optional[bool] = None 6442 ) -> t.Optional[exp.Expression]: 6443 this = self._parse_bitwise() 6444 6445 if self._match(TokenType.USING): 6446 to: t.Optional[exp.Expression] = self.expression( 6447 exp.CharacterSet, this=self._parse_var() 6448 ) 6449 elif self._match(TokenType.COMMA): 6450 to = self._parse_types() 6451 else: 6452 to = None 6453 6454 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6455 6456 def _parse_xml_table(self) -> exp.XMLTable: 6457 namespaces = None 6458 passing = None 6459 columns = None 6460 6461 if self._match_text_seq("XMLNAMESPACES", "("): 6462 namespaces = self._parse_xml_namespace() 6463 self._match_text_seq(")", ",") 6464 6465 this = self._parse_string() 6466 6467 if self._match_text_seq("PASSING"): 6468 # The BY VALUE keywords are optional and are provided for semantic clarity 6469 self._match_text_seq("BY", "VALUE") 6470 passing = self._parse_csv(self._parse_column) 6471 6472 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6473 6474 if self._match_text_seq("COLUMNS"): 6475 columns = self._parse_csv(self._parse_field_def) 6476 6477 return self.expression( 6478 exp.XMLTable, 6479 this=this, 6480 namespaces=namespaces, 6481 passing=passing, 6482 columns=columns, 6483 by_ref=by_ref, 6484 ) 6485 6486 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6487 namespaces = [] 6488 6489 while True: 6490 if self._match(TokenType.DEFAULT): 6491 uri = self._parse_string() 6492 else: 6493 uri = self._parse_alias(self._parse_string()) 6494 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6495 if not self._match(TokenType.COMMA): 6496 break 6497 6498 return namespaces 6499 6500 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6501 """ 6502 There are generally two variants of the DECODE function: 6503 6504 - DECODE(bin, charset) 6505 - DECODE(expression, search, result [, search, result] ... [, default]) 6506 6507 The second variant will always be parsed into a CASE expression. Note that NULL 6508 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6509 instead of relying on pattern matching. 6510 """ 6511 args = self._parse_csv(self._parse_assignment) 6512 6513 if len(args) < 3: 6514 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6515 6516 expression, *expressions = args 6517 if not expression: 6518 return None 6519 6520 ifs = [] 6521 for search, result in zip(expressions[::2], expressions[1::2]): 6522 if not search or not result: 6523 return None 6524 6525 if isinstance(search, exp.Literal): 6526 ifs.append( 6527 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6528 ) 6529 elif isinstance(search, exp.Null): 6530 ifs.append( 6531 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6532 ) 6533 else: 6534 cond = exp.or_( 6535 exp.EQ(this=expression.copy(), expression=search), 6536 exp.and_( 6537 exp.Is(this=expression.copy(), expression=exp.Null()), 6538 exp.Is(this=search.copy(), expression=exp.Null()), 6539 copy=False, 6540 ), 6541 copy=False, 6542 ) 6543 ifs.append(exp.If(this=cond, true=result)) 6544 6545 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6546 6547 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6548 self._match_text_seq("KEY") 6549 key = self._parse_column() 6550 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6551 self._match_text_seq("VALUE") 6552 value = self._parse_bitwise() 6553 6554 if not key and not value: 6555 return None 6556 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6557 6558 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6559 if not this or not self._match_text_seq("FORMAT", "JSON"): 6560 return this 6561 6562 return self.expression(exp.FormatJson, this=this) 6563 6564 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6565 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6566 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6567 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6568 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6569 else: 6570 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6571 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6572 6573 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6574 6575 if not empty and not error and not null: 6576 return None 6577 6578 return self.expression( 6579 exp.OnCondition, 6580 empty=empty, 6581 error=error, 6582 null=null, 6583 ) 6584 6585 def _parse_on_handling( 6586 self, on: str, *values: str 6587 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6588 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6589 for value in values: 6590 if self._match_text_seq(value, "ON", on): 6591 return f"{value} ON {on}" 6592 6593 index = self._index 6594 if self._match(TokenType.DEFAULT): 6595 default_value = self._parse_bitwise() 6596 if self._match_text_seq("ON", on): 6597 return default_value 6598 6599 self._retreat(index) 6600 6601 return None 6602 6603 @t.overload 6604 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6605 6606 @t.overload 6607 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6608 6609 def _parse_json_object(self, agg=False): 6610 star = self._parse_star() 6611 expressions = ( 6612 [star] 6613 if star 6614 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6615 ) 6616 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6617 6618 unique_keys = None 6619 if self._match_text_seq("WITH", "UNIQUE"): 6620 unique_keys = True 6621 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6622 unique_keys = False 6623 6624 self._match_text_seq("KEYS") 6625 6626 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6627 self._parse_type() 6628 ) 6629 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6630 6631 return self.expression( 6632 exp.JSONObjectAgg if agg else exp.JSONObject, 6633 expressions=expressions, 6634 null_handling=null_handling, 6635 unique_keys=unique_keys, 6636 return_type=return_type, 6637 encoding=encoding, 6638 ) 6639 6640 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6641 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6642 if not self._match_text_seq("NESTED"): 6643 this = self._parse_id_var() 6644 kind = self._parse_types(allow_identifiers=False) 6645 nested = None 6646 else: 6647 this = None 6648 kind = None 6649 nested = True 6650 6651 path = self._match_text_seq("PATH") and self._parse_string() 6652 nested_schema = nested and self._parse_json_schema() 6653 6654 return self.expression( 6655 exp.JSONColumnDef, 6656 this=this, 6657 kind=kind, 6658 path=path, 6659 nested_schema=nested_schema, 6660 ) 6661 6662 def _parse_json_schema(self) -> exp.JSONSchema: 6663 self._match_text_seq("COLUMNS") 6664 return self.expression( 6665 exp.JSONSchema, 6666 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6667 ) 6668 6669 def _parse_json_table(self) -> exp.JSONTable: 6670 this = self._parse_format_json(self._parse_bitwise()) 6671 path = self._match(TokenType.COMMA) and self._parse_string() 6672 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6673 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6674 schema = self._parse_json_schema() 6675 6676 return exp.JSONTable( 6677 this=this, 6678 schema=schema, 6679 path=path, 6680 error_handling=error_handling, 6681 empty_handling=empty_handling, 6682 ) 6683 6684 def _parse_match_against(self) -> exp.MatchAgainst: 6685 expressions = self._parse_csv(self._parse_column) 6686 6687 self._match_text_seq(")", "AGAINST", "(") 6688 6689 this = self._parse_string() 6690 6691 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6692 modifier = "IN NATURAL LANGUAGE MODE" 6693 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6694 modifier = f"{modifier} WITH QUERY EXPANSION" 6695 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6696 modifier = "IN BOOLEAN MODE" 6697 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6698 modifier = "WITH QUERY EXPANSION" 6699 else: 6700 modifier = None 6701 6702 return self.expression( 6703 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6704 ) 6705 6706 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6707 def _parse_open_json(self) -> exp.OpenJSON: 6708 this = self._parse_bitwise() 6709 path = self._match(TokenType.COMMA) and self._parse_string() 6710 6711 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6712 this = self._parse_field(any_token=True) 6713 kind = self._parse_types() 6714 path = self._parse_string() 6715 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6716 6717 return self.expression( 6718 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6719 ) 6720 6721 expressions = None 6722 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6723 self._match_l_paren() 6724 expressions = self._parse_csv(_parse_open_json_column_def) 6725 6726 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6727 6728 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6729 args = self._parse_csv(self._parse_bitwise) 6730 6731 if self._match(TokenType.IN): 6732 return self.expression( 6733 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6734 ) 6735 6736 if haystack_first: 6737 haystack = seq_get(args, 0) 6738 needle = seq_get(args, 1) 6739 else: 6740 haystack = seq_get(args, 1) 6741 needle = seq_get(args, 0) 6742 6743 return self.expression( 6744 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6745 ) 6746 6747 def _parse_predict(self) -> exp.Predict: 6748 self._match_text_seq("MODEL") 6749 this = self._parse_table() 6750 6751 self._match(TokenType.COMMA) 6752 self._match_text_seq("TABLE") 6753 6754 return self.expression( 6755 exp.Predict, 6756 this=this, 6757 expression=self._parse_table(), 6758 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6759 ) 6760 6761 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6762 args = self._parse_csv(self._parse_table) 6763 return exp.JoinHint(this=func_name.upper(), expressions=args) 6764 6765 def _parse_substring(self) -> exp.Substring: 6766 # Postgres supports the form: substring(string [from int] [for int]) 6767 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6768 6769 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6770 6771 if self._match(TokenType.FROM): 6772 args.append(self._parse_bitwise()) 6773 if self._match(TokenType.FOR): 6774 if len(args) == 1: 6775 args.append(exp.Literal.number(1)) 6776 args.append(self._parse_bitwise()) 6777 6778 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6779 6780 def _parse_trim(self) -> exp.Trim: 6781 # https://www.w3resource.com/sql/character-functions/trim.php 6782 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6783 6784 position = None 6785 collation = None 6786 expression = None 6787 6788 if self._match_texts(self.TRIM_TYPES): 6789 position = self._prev.text.upper() 6790 6791 this = self._parse_bitwise() 6792 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6793 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6794 expression = self._parse_bitwise() 6795 6796 if invert_order: 6797 this, expression = expression, this 6798 6799 if self._match(TokenType.COLLATE): 6800 collation = self._parse_bitwise() 6801 6802 return self.expression( 6803 exp.Trim, this=this, position=position, expression=expression, collation=collation 6804 ) 6805 6806 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6807 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6808 6809 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6810 return self._parse_window(self._parse_id_var(), alias=True) 6811 6812 def _parse_respect_or_ignore_nulls( 6813 self, this: t.Optional[exp.Expression] 6814 ) -> t.Optional[exp.Expression]: 6815 if self._match_text_seq("IGNORE", "NULLS"): 6816 return self.expression(exp.IgnoreNulls, this=this) 6817 if self._match_text_seq("RESPECT", "NULLS"): 6818 return self.expression(exp.RespectNulls, this=this) 6819 return this 6820 6821 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6822 if self._match(TokenType.HAVING): 6823 self._match_texts(("MAX", "MIN")) 6824 max = self._prev.text.upper() != "MIN" 6825 return self.expression( 6826 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6827 ) 6828 6829 return this 6830 6831 def _parse_window( 6832 self, this: t.Optional[exp.Expression], alias: bool = False 6833 ) -> t.Optional[exp.Expression]: 6834 func = this 6835 comments = func.comments if isinstance(func, exp.Expression) else None 6836 6837 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6838 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6839 if self._match_text_seq("WITHIN", "GROUP"): 6840 order = self._parse_wrapped(self._parse_order) 6841 this = self.expression(exp.WithinGroup, this=this, expression=order) 6842 6843 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6844 self._match(TokenType.WHERE) 6845 this = self.expression( 6846 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6847 ) 6848 self._match_r_paren() 6849 6850 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6851 # Some dialects choose to implement and some do not. 6852 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6853 6854 # There is some code above in _parse_lambda that handles 6855 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6856 6857 # The below changes handle 6858 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6859 6860 # Oracle allows both formats 6861 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6862 # and Snowflake chose to do the same for familiarity 6863 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6864 if isinstance(this, exp.AggFunc): 6865 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6866 6867 if ignore_respect and ignore_respect is not this: 6868 ignore_respect.replace(ignore_respect.this) 6869 this = self.expression(ignore_respect.__class__, this=this) 6870 6871 this = self._parse_respect_or_ignore_nulls(this) 6872 6873 # bigquery select from window x AS (partition by ...) 6874 if alias: 6875 over = None 6876 self._match(TokenType.ALIAS) 6877 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6878 return this 6879 else: 6880 over = self._prev.text.upper() 6881 6882 if comments and isinstance(func, exp.Expression): 6883 func.pop_comments() 6884 6885 if not self._match(TokenType.L_PAREN): 6886 return self.expression( 6887 exp.Window, 6888 comments=comments, 6889 this=this, 6890 alias=self._parse_id_var(False), 6891 over=over, 6892 ) 6893 6894 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6895 6896 first = self._match(TokenType.FIRST) 6897 if self._match_text_seq("LAST"): 6898 first = False 6899 6900 partition, order = self._parse_partition_and_order() 6901 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6902 6903 if kind: 6904 self._match(TokenType.BETWEEN) 6905 start = self._parse_window_spec() 6906 self._match(TokenType.AND) 6907 end = self._parse_window_spec() 6908 exclude = ( 6909 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6910 if self._match_text_seq("EXCLUDE") 6911 else None 6912 ) 6913 6914 spec = self.expression( 6915 exp.WindowSpec, 6916 kind=kind, 6917 start=start["value"], 6918 start_side=start["side"], 6919 end=end["value"], 6920 end_side=end["side"], 6921 exclude=exclude, 6922 ) 6923 else: 6924 spec = None 6925 6926 self._match_r_paren() 6927 6928 window = self.expression( 6929 exp.Window, 6930 comments=comments, 6931 this=this, 6932 partition_by=partition, 6933 order=order, 6934 spec=spec, 6935 alias=window_alias, 6936 over=over, 6937 first=first, 6938 ) 6939 6940 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6941 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6942 return self._parse_window(window, alias=alias) 6943 6944 return window 6945 6946 def _parse_partition_and_order( 6947 self, 6948 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6949 return self._parse_partition_by(), self._parse_order() 6950 6951 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6952 self._match(TokenType.BETWEEN) 6953 6954 return { 6955 "value": ( 6956 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6957 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6958 or self._parse_bitwise() 6959 ), 6960 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6961 } 6962 6963 def _parse_alias( 6964 self, this: t.Optional[exp.Expression], explicit: bool = False 6965 ) -> t.Optional[exp.Expression]: 6966 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6967 # so this section tries to parse the clause version and if it fails, it treats the token 6968 # as an identifier (alias) 6969 if self._can_parse_limit_or_offset(): 6970 return this 6971 6972 any_token = self._match(TokenType.ALIAS) 6973 comments = self._prev_comments or [] 6974 6975 if explicit and not any_token: 6976 return this 6977 6978 if self._match(TokenType.L_PAREN): 6979 aliases = self.expression( 6980 exp.Aliases, 6981 comments=comments, 6982 this=this, 6983 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6984 ) 6985 self._match_r_paren(aliases) 6986 return aliases 6987 6988 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6989 self.STRING_ALIASES and self._parse_string_as_identifier() 6990 ) 6991 6992 if alias: 6993 comments.extend(alias.pop_comments()) 6994 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6995 column = this.this 6996 6997 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6998 if not this.comments and column and column.comments: 6999 this.comments = column.pop_comments() 7000 7001 return this 7002 7003 def _parse_id_var( 7004 self, 7005 any_token: bool = True, 7006 tokens: t.Optional[t.Collection[TokenType]] = None, 7007 ) -> t.Optional[exp.Expression]: 7008 expression = self._parse_identifier() 7009 if not expression and ( 7010 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7011 ): 7012 quoted = self._prev.token_type == TokenType.STRING 7013 expression = self._identifier_expression(quoted=quoted) 7014 7015 return expression 7016 7017 def _parse_string(self) -> t.Optional[exp.Expression]: 7018 if self._match_set(self.STRING_PARSERS): 7019 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7020 return self._parse_placeholder() 7021 7022 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7023 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7024 if output: 7025 output.update_positions(self._prev) 7026 return output 7027 7028 def _parse_number(self) -> t.Optional[exp.Expression]: 7029 if self._match_set(self.NUMERIC_PARSERS): 7030 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7031 return self._parse_placeholder() 7032 7033 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7034 if self._match(TokenType.IDENTIFIER): 7035 return self._identifier_expression(quoted=True) 7036 return self._parse_placeholder() 7037 7038 def _parse_var( 7039 self, 7040 any_token: bool = False, 7041 tokens: t.Optional[t.Collection[TokenType]] = None, 7042 upper: bool = False, 7043 ) -> t.Optional[exp.Expression]: 7044 if ( 7045 (any_token and self._advance_any()) 7046 or self._match(TokenType.VAR) 7047 or (self._match_set(tokens) if tokens else False) 7048 ): 7049 return self.expression( 7050 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7051 ) 7052 return self._parse_placeholder() 7053 7054 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7055 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7056 self._advance() 7057 return self._prev 7058 return None 7059 7060 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7061 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7062 7063 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7064 return self._parse_primary() or self._parse_var(any_token=True) 7065 7066 def _parse_null(self) -> t.Optional[exp.Expression]: 7067 if self._match_set(self.NULL_TOKENS): 7068 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7069 return self._parse_placeholder() 7070 7071 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7072 if self._match(TokenType.TRUE): 7073 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7074 if self._match(TokenType.FALSE): 7075 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7076 return self._parse_placeholder() 7077 7078 def _parse_star(self) -> t.Optional[exp.Expression]: 7079 if self._match(TokenType.STAR): 7080 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7081 return self._parse_placeholder() 7082 7083 def _parse_parameter(self) -> exp.Parameter: 7084 this = self._parse_identifier() or self._parse_primary_or_var() 7085 return self.expression(exp.Parameter, this=this) 7086 7087 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7088 if self._match_set(self.PLACEHOLDER_PARSERS): 7089 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7090 if placeholder: 7091 return placeholder 7092 self._advance(-1) 7093 return None 7094 7095 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7096 if not self._match_texts(keywords): 7097 return None 7098 if self._match(TokenType.L_PAREN, advance=False): 7099 return self._parse_wrapped_csv(self._parse_expression) 7100 7101 expression = self._parse_expression() 7102 return [expression] if expression else None 7103 7104 def _parse_csv( 7105 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7106 ) -> t.List[exp.Expression]: 7107 parse_result = parse_method() 7108 items = [parse_result] if parse_result is not None else [] 7109 7110 while self._match(sep): 7111 self._add_comments(parse_result) 7112 parse_result = parse_method() 7113 if parse_result is not None: 7114 items.append(parse_result) 7115 7116 return items 7117 7118 def _parse_tokens( 7119 self, parse_method: t.Callable, expressions: t.Dict 7120 ) -> t.Optional[exp.Expression]: 7121 this = parse_method() 7122 7123 while self._match_set(expressions): 7124 this = self.expression( 7125 expressions[self._prev.token_type], 7126 this=this, 7127 comments=self._prev_comments, 7128 expression=parse_method(), 7129 ) 7130 7131 return this 7132 7133 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7134 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7135 7136 def _parse_wrapped_csv( 7137 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7138 ) -> t.List[exp.Expression]: 7139 return self._parse_wrapped( 7140 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7141 ) 7142 7143 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7144 wrapped = self._match(TokenType.L_PAREN) 7145 if not wrapped and not optional: 7146 self.raise_error("Expecting (") 7147 parse_result = parse_method() 7148 if wrapped: 7149 self._match_r_paren() 7150 return parse_result 7151 7152 def _parse_expressions(self) -> t.List[exp.Expression]: 7153 return self._parse_csv(self._parse_expression) 7154 7155 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7156 return self._parse_select() or self._parse_set_operations( 7157 self._parse_alias(self._parse_assignment(), explicit=True) 7158 if alias 7159 else self._parse_assignment() 7160 ) 7161 7162 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7163 return self._parse_query_modifiers( 7164 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7165 ) 7166 7167 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7168 this = None 7169 if self._match_texts(self.TRANSACTION_KIND): 7170 this = self._prev.text 7171 7172 self._match_texts(("TRANSACTION", "WORK")) 7173 7174 modes = [] 7175 while True: 7176 mode = [] 7177 while self._match(TokenType.VAR): 7178 mode.append(self._prev.text) 7179 7180 if mode: 7181 modes.append(" ".join(mode)) 7182 if not self._match(TokenType.COMMA): 7183 break 7184 7185 return self.expression(exp.Transaction, this=this, modes=modes) 7186 7187 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7188 chain = None 7189 savepoint = None 7190 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7191 7192 self._match_texts(("TRANSACTION", "WORK")) 7193 7194 if self._match_text_seq("TO"): 7195 self._match_text_seq("SAVEPOINT") 7196 savepoint = self._parse_id_var() 7197 7198 if self._match(TokenType.AND): 7199 chain = not self._match_text_seq("NO") 7200 self._match_text_seq("CHAIN") 7201 7202 if is_rollback: 7203 return self.expression(exp.Rollback, savepoint=savepoint) 7204 7205 return self.expression(exp.Commit, chain=chain) 7206 7207 def _parse_refresh(self) -> exp.Refresh: 7208 self._match(TokenType.TABLE) 7209 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7210 7211 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7212 if not self._match_text_seq("ADD"): 7213 return None 7214 7215 self._match(TokenType.COLUMN) 7216 exists_column = self._parse_exists(not_=True) 7217 expression = self._parse_field_def() 7218 7219 if expression: 7220 expression.set("exists", exists_column) 7221 7222 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7223 if self._match_texts(("FIRST", "AFTER")): 7224 position = self._prev.text 7225 column_position = self.expression( 7226 exp.ColumnPosition, this=self._parse_column(), position=position 7227 ) 7228 expression.set("position", column_position) 7229 7230 return expression 7231 7232 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7233 drop = self._match(TokenType.DROP) and self._parse_drop() 7234 if drop and not isinstance(drop, exp.Command): 7235 drop.set("kind", drop.args.get("kind", "COLUMN")) 7236 return drop 7237 7238 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7239 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7240 return self.expression( 7241 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7242 ) 7243 7244 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7245 index = self._index - 1 7246 7247 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7248 return self._parse_csv( 7249 lambda: self.expression( 7250 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7251 ) 7252 ) 7253 7254 self._retreat(index) 7255 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7256 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7257 7258 if self._match_text_seq("ADD", "COLUMNS"): 7259 schema = self._parse_schema() 7260 if schema: 7261 return [schema] 7262 return [] 7263 7264 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7265 7266 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7267 if self._match_texts(self.ALTER_ALTER_PARSERS): 7268 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7269 7270 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7271 # keyword after ALTER we default to parsing this statement 7272 self._match(TokenType.COLUMN) 7273 column = self._parse_field(any_token=True) 7274 7275 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7276 return self.expression(exp.AlterColumn, this=column, drop=True) 7277 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7278 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7279 if self._match(TokenType.COMMENT): 7280 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7281 if self._match_text_seq("DROP", "NOT", "NULL"): 7282 return self.expression( 7283 exp.AlterColumn, 7284 this=column, 7285 drop=True, 7286 allow_null=True, 7287 ) 7288 if self._match_text_seq("SET", "NOT", "NULL"): 7289 return self.expression( 7290 exp.AlterColumn, 7291 this=column, 7292 allow_null=False, 7293 ) 7294 7295 if self._match_text_seq("SET", "VISIBLE"): 7296 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7297 if self._match_text_seq("SET", "INVISIBLE"): 7298 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7299 7300 self._match_text_seq("SET", "DATA") 7301 self._match_text_seq("TYPE") 7302 return self.expression( 7303 exp.AlterColumn, 7304 this=column, 7305 dtype=self._parse_types(), 7306 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7307 using=self._match(TokenType.USING) and self._parse_assignment(), 7308 ) 7309 7310 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7311 if self._match_texts(("ALL", "EVEN", "AUTO")): 7312 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7313 7314 self._match_text_seq("KEY", "DISTKEY") 7315 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7316 7317 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7318 if compound: 7319 self._match_text_seq("SORTKEY") 7320 7321 if self._match(TokenType.L_PAREN, advance=False): 7322 return self.expression( 7323 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7324 ) 7325 7326 self._match_texts(("AUTO", "NONE")) 7327 return self.expression( 7328 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7329 ) 7330 7331 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7332 index = self._index - 1 7333 7334 partition_exists = self._parse_exists() 7335 if self._match(TokenType.PARTITION, advance=False): 7336 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7337 7338 self._retreat(index) 7339 return self._parse_csv(self._parse_drop_column) 7340 7341 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7342 if self._match(TokenType.COLUMN): 7343 exists = self._parse_exists() 7344 old_column = self._parse_column() 7345 to = self._match_text_seq("TO") 7346 new_column = self._parse_column() 7347 7348 if old_column is None or to is None or new_column is None: 7349 return None 7350 7351 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7352 7353 self._match_text_seq("TO") 7354 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7355 7356 def _parse_alter_table_set(self) -> exp.AlterSet: 7357 alter_set = self.expression(exp.AlterSet) 7358 7359 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7360 "TABLE", "PROPERTIES" 7361 ): 7362 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7363 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7364 alter_set.set("expressions", [self._parse_assignment()]) 7365 elif self._match_texts(("LOGGED", "UNLOGGED")): 7366 alter_set.set("option", exp.var(self._prev.text.upper())) 7367 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7368 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7369 elif self._match_text_seq("LOCATION"): 7370 alter_set.set("location", self._parse_field()) 7371 elif self._match_text_seq("ACCESS", "METHOD"): 7372 alter_set.set("access_method", self._parse_field()) 7373 elif self._match_text_seq("TABLESPACE"): 7374 alter_set.set("tablespace", self._parse_field()) 7375 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7376 alter_set.set("file_format", [self._parse_field()]) 7377 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7378 alter_set.set("file_format", self._parse_wrapped_options()) 7379 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7380 alter_set.set("copy_options", self._parse_wrapped_options()) 7381 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7382 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7383 else: 7384 if self._match_text_seq("SERDE"): 7385 alter_set.set("serde", self._parse_field()) 7386 7387 alter_set.set("expressions", [self._parse_properties()]) 7388 7389 return alter_set 7390 7391 def _parse_alter(self) -> exp.Alter | exp.Command: 7392 start = self._prev 7393 7394 alter_token = self._match_set(self.ALTERABLES) and self._prev 7395 if not alter_token: 7396 return self._parse_as_command(start) 7397 7398 exists = self._parse_exists() 7399 only = self._match_text_seq("ONLY") 7400 this = self._parse_table(schema=True) 7401 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7402 7403 if self._next: 7404 self._advance() 7405 7406 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7407 if parser: 7408 actions = ensure_list(parser(self)) 7409 not_valid = self._match_text_seq("NOT", "VALID") 7410 options = self._parse_csv(self._parse_property) 7411 7412 if not self._curr and actions: 7413 return self.expression( 7414 exp.Alter, 7415 this=this, 7416 kind=alter_token.text.upper(), 7417 exists=exists, 7418 actions=actions, 7419 only=only, 7420 options=options, 7421 cluster=cluster, 7422 not_valid=not_valid, 7423 ) 7424 7425 return self._parse_as_command(start) 7426 7427 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7428 start = self._prev 7429 # https://duckdb.org/docs/sql/statements/analyze 7430 if not self._curr: 7431 return self.expression(exp.Analyze) 7432 7433 options = [] 7434 while self._match_texts(self.ANALYZE_STYLES): 7435 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7436 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7437 else: 7438 options.append(self._prev.text.upper()) 7439 7440 this: t.Optional[exp.Expression] = None 7441 inner_expression: t.Optional[exp.Expression] = None 7442 7443 kind = self._curr and self._curr.text.upper() 7444 7445 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7446 this = self._parse_table_parts() 7447 elif self._match_text_seq("TABLES"): 7448 if self._match_set((TokenType.FROM, TokenType.IN)): 7449 kind = f"{kind} {self._prev.text.upper()}" 7450 this = self._parse_table(schema=True, is_db_reference=True) 7451 elif self._match_text_seq("DATABASE"): 7452 this = self._parse_table(schema=True, is_db_reference=True) 7453 elif self._match_text_seq("CLUSTER"): 7454 this = self._parse_table() 7455 # Try matching inner expr keywords before fallback to parse table. 7456 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7457 kind = None 7458 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7459 else: 7460 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7461 kind = None 7462 this = self._parse_table_parts() 7463 7464 partition = self._try_parse(self._parse_partition) 7465 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7466 return self._parse_as_command(start) 7467 7468 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7469 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7470 "WITH", "ASYNC", "MODE" 7471 ): 7472 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7473 else: 7474 mode = None 7475 7476 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7477 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7478 7479 properties = self._parse_properties() 7480 return self.expression( 7481 exp.Analyze, 7482 kind=kind, 7483 this=this, 7484 mode=mode, 7485 partition=partition, 7486 properties=properties, 7487 expression=inner_expression, 7488 options=options, 7489 ) 7490 7491 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7492 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7493 this = None 7494 kind = self._prev.text.upper() 7495 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7496 expressions = [] 7497 7498 if not self._match_text_seq("STATISTICS"): 7499 self.raise_error("Expecting token STATISTICS") 7500 7501 if self._match_text_seq("NOSCAN"): 7502 this = "NOSCAN" 7503 elif self._match(TokenType.FOR): 7504 if self._match_text_seq("ALL", "COLUMNS"): 7505 this = "FOR ALL COLUMNS" 7506 if self._match_texts("COLUMNS"): 7507 this = "FOR COLUMNS" 7508 expressions = self._parse_csv(self._parse_column_reference) 7509 elif self._match_text_seq("SAMPLE"): 7510 sample = self._parse_number() 7511 expressions = [ 7512 self.expression( 7513 exp.AnalyzeSample, 7514 sample=sample, 7515 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7516 ) 7517 ] 7518 7519 return self.expression( 7520 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7521 ) 7522 7523 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7524 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7525 kind = None 7526 this = None 7527 expression: t.Optional[exp.Expression] = None 7528 if self._match_text_seq("REF", "UPDATE"): 7529 kind = "REF" 7530 this = "UPDATE" 7531 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7532 this = "UPDATE SET DANGLING TO NULL" 7533 elif self._match_text_seq("STRUCTURE"): 7534 kind = "STRUCTURE" 7535 if self._match_text_seq("CASCADE", "FAST"): 7536 this = "CASCADE FAST" 7537 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7538 ("ONLINE", "OFFLINE") 7539 ): 7540 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7541 expression = self._parse_into() 7542 7543 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7544 7545 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7546 this = self._prev.text.upper() 7547 if self._match_text_seq("COLUMNS"): 7548 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7549 return None 7550 7551 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7552 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7553 if self._match_text_seq("STATISTICS"): 7554 return self.expression(exp.AnalyzeDelete, kind=kind) 7555 return None 7556 7557 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7558 if self._match_text_seq("CHAINED", "ROWS"): 7559 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7560 return None 7561 7562 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7563 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7564 this = self._prev.text.upper() 7565 expression: t.Optional[exp.Expression] = None 7566 expressions = [] 7567 update_options = None 7568 7569 if self._match_text_seq("HISTOGRAM", "ON"): 7570 expressions = self._parse_csv(self._parse_column_reference) 7571 with_expressions = [] 7572 while self._match(TokenType.WITH): 7573 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7574 if self._match_texts(("SYNC", "ASYNC")): 7575 if self._match_text_seq("MODE", advance=False): 7576 with_expressions.append(f"{self._prev.text.upper()} MODE") 7577 self._advance() 7578 else: 7579 buckets = self._parse_number() 7580 if self._match_text_seq("BUCKETS"): 7581 with_expressions.append(f"{buckets} BUCKETS") 7582 if with_expressions: 7583 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7584 7585 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7586 TokenType.UPDATE, advance=False 7587 ): 7588 update_options = self._prev.text.upper() 7589 self._advance() 7590 elif self._match_text_seq("USING", "DATA"): 7591 expression = self.expression(exp.UsingData, this=self._parse_string()) 7592 7593 return self.expression( 7594 exp.AnalyzeHistogram, 7595 this=this, 7596 expressions=expressions, 7597 expression=expression, 7598 update_options=update_options, 7599 ) 7600 7601 def _parse_merge(self) -> exp.Merge: 7602 self._match(TokenType.INTO) 7603 target = self._parse_table() 7604 7605 if target and self._match(TokenType.ALIAS, advance=False): 7606 target.set("alias", self._parse_table_alias()) 7607 7608 self._match(TokenType.USING) 7609 using = self._parse_table() 7610 7611 self._match(TokenType.ON) 7612 on = self._parse_assignment() 7613 7614 return self.expression( 7615 exp.Merge, 7616 this=target, 7617 using=using, 7618 on=on, 7619 whens=self._parse_when_matched(), 7620 returning=self._parse_returning(), 7621 ) 7622 7623 def _parse_when_matched(self) -> exp.Whens: 7624 whens = [] 7625 7626 while self._match(TokenType.WHEN): 7627 matched = not self._match(TokenType.NOT) 7628 self._match_text_seq("MATCHED") 7629 source = ( 7630 False 7631 if self._match_text_seq("BY", "TARGET") 7632 else self._match_text_seq("BY", "SOURCE") 7633 ) 7634 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7635 7636 self._match(TokenType.THEN) 7637 7638 if self._match(TokenType.INSERT): 7639 this = self._parse_star() 7640 if this: 7641 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7642 else: 7643 then = self.expression( 7644 exp.Insert, 7645 this=exp.var("ROW") 7646 if self._match_text_seq("ROW") 7647 else self._parse_value(values=False), 7648 expression=self._match_text_seq("VALUES") and self._parse_value(), 7649 ) 7650 elif self._match(TokenType.UPDATE): 7651 expressions = self._parse_star() 7652 if expressions: 7653 then = self.expression(exp.Update, expressions=expressions) 7654 else: 7655 then = self.expression( 7656 exp.Update, 7657 expressions=self._match(TokenType.SET) 7658 and self._parse_csv(self._parse_equality), 7659 ) 7660 elif self._match(TokenType.DELETE): 7661 then = self.expression(exp.Var, this=self._prev.text) 7662 else: 7663 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7664 7665 whens.append( 7666 self.expression( 7667 exp.When, 7668 matched=matched, 7669 source=source, 7670 condition=condition, 7671 then=then, 7672 ) 7673 ) 7674 return self.expression(exp.Whens, expressions=whens) 7675 7676 def _parse_show(self) -> t.Optional[exp.Expression]: 7677 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7678 if parser: 7679 return parser(self) 7680 return self._parse_as_command(self._prev) 7681 7682 def _parse_set_item_assignment( 7683 self, kind: t.Optional[str] = None 7684 ) -> t.Optional[exp.Expression]: 7685 index = self._index 7686 7687 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7688 return self._parse_set_transaction(global_=kind == "GLOBAL") 7689 7690 left = self._parse_primary() or self._parse_column() 7691 assignment_delimiter = self._match_texts(("=", "TO")) 7692 7693 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7694 self._retreat(index) 7695 return None 7696 7697 right = self._parse_statement() or self._parse_id_var() 7698 if isinstance(right, (exp.Column, exp.Identifier)): 7699 right = exp.var(right.name) 7700 7701 this = self.expression(exp.EQ, this=left, expression=right) 7702 return self.expression(exp.SetItem, this=this, kind=kind) 7703 7704 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7705 self._match_text_seq("TRANSACTION") 7706 characteristics = self._parse_csv( 7707 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7708 ) 7709 return self.expression( 7710 exp.SetItem, 7711 expressions=characteristics, 7712 kind="TRANSACTION", 7713 **{"global": global_}, # type: ignore 7714 ) 7715 7716 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7717 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7718 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7719 7720 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7721 index = self._index 7722 set_ = self.expression( 7723 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7724 ) 7725 7726 if self._curr: 7727 self._retreat(index) 7728 return self._parse_as_command(self._prev) 7729 7730 return set_ 7731 7732 def _parse_var_from_options( 7733 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7734 ) -> t.Optional[exp.Var]: 7735 start = self._curr 7736 if not start: 7737 return None 7738 7739 option = start.text.upper() 7740 continuations = options.get(option) 7741 7742 index = self._index 7743 self._advance() 7744 for keywords in continuations or []: 7745 if isinstance(keywords, str): 7746 keywords = (keywords,) 7747 7748 if self._match_text_seq(*keywords): 7749 option = f"{option} {' '.join(keywords)}" 7750 break 7751 else: 7752 if continuations or continuations is None: 7753 if raise_unmatched: 7754 self.raise_error(f"Unknown option {option}") 7755 7756 self._retreat(index) 7757 return None 7758 7759 return exp.var(option) 7760 7761 def _parse_as_command(self, start: Token) -> exp.Command: 7762 while self._curr: 7763 self._advance() 7764 text = self._find_sql(start, self._prev) 7765 size = len(start.text) 7766 self._warn_unsupported() 7767 return exp.Command(this=text[:size], expression=text[size:]) 7768 7769 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7770 settings = [] 7771 7772 self._match_l_paren() 7773 kind = self._parse_id_var() 7774 7775 if self._match(TokenType.L_PAREN): 7776 while True: 7777 key = self._parse_id_var() 7778 value = self._parse_primary() 7779 if not key and value is None: 7780 break 7781 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7782 self._match(TokenType.R_PAREN) 7783 7784 self._match_r_paren() 7785 7786 return self.expression( 7787 exp.DictProperty, 7788 this=this, 7789 kind=kind.this if kind else None, 7790 settings=settings, 7791 ) 7792 7793 def _parse_dict_range(self, this: str) -> exp.DictRange: 7794 self._match_l_paren() 7795 has_min = self._match_text_seq("MIN") 7796 if has_min: 7797 min = self._parse_var() or self._parse_primary() 7798 self._match_text_seq("MAX") 7799 max = self._parse_var() or self._parse_primary() 7800 else: 7801 max = self._parse_var() or self._parse_primary() 7802 min = exp.Literal.number(0) 7803 self._match_r_paren() 7804 return self.expression(exp.DictRange, this=this, min=min, max=max) 7805 7806 def _parse_comprehension( 7807 self, this: t.Optional[exp.Expression] 7808 ) -> t.Optional[exp.Comprehension]: 7809 index = self._index 7810 expression = self._parse_column() 7811 if not self._match(TokenType.IN): 7812 self._retreat(index - 1) 7813 return None 7814 iterator = self._parse_column() 7815 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7816 return self.expression( 7817 exp.Comprehension, 7818 this=this, 7819 expression=expression, 7820 iterator=iterator, 7821 condition=condition, 7822 ) 7823 7824 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7825 if self._match(TokenType.HEREDOC_STRING): 7826 return self.expression(exp.Heredoc, this=self._prev.text) 7827 7828 if not self._match_text_seq("$"): 7829 return None 7830 7831 tags = ["$"] 7832 tag_text = None 7833 7834 if self._is_connected(): 7835 self._advance() 7836 tags.append(self._prev.text.upper()) 7837 else: 7838 self.raise_error("No closing $ found") 7839 7840 if tags[-1] != "$": 7841 if self._is_connected() and self._match_text_seq("$"): 7842 tag_text = tags[-1] 7843 tags.append("$") 7844 else: 7845 self.raise_error("No closing $ found") 7846 7847 heredoc_start = self._curr 7848 7849 while self._curr: 7850 if self._match_text_seq(*tags, advance=False): 7851 this = self._find_sql(heredoc_start, self._prev) 7852 self._advance(len(tags)) 7853 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7854 7855 self._advance() 7856 7857 self.raise_error(f"No closing {''.join(tags)} found") 7858 return None 7859 7860 def _find_parser( 7861 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7862 ) -> t.Optional[t.Callable]: 7863 if not self._curr: 7864 return None 7865 7866 index = self._index 7867 this = [] 7868 while True: 7869 # The current token might be multiple words 7870 curr = self._curr.text.upper() 7871 key = curr.split(" ") 7872 this.append(curr) 7873 7874 self._advance() 7875 result, trie = in_trie(trie, key) 7876 if result == TrieResult.FAILED: 7877 break 7878 7879 if result == TrieResult.EXISTS: 7880 subparser = parsers[" ".join(this)] 7881 return subparser 7882 7883 self._retreat(index) 7884 return None 7885 7886 def _match(self, token_type, advance=True, expression=None): 7887 if not self._curr: 7888 return None 7889 7890 if self._curr.token_type == token_type: 7891 if advance: 7892 self._advance() 7893 self._add_comments(expression) 7894 return True 7895 7896 return None 7897 7898 def _match_set(self, types, advance=True): 7899 if not self._curr: 7900 return None 7901 7902 if self._curr.token_type in types: 7903 if advance: 7904 self._advance() 7905 return True 7906 7907 return None 7908 7909 def _match_pair(self, token_type_a, token_type_b, advance=True): 7910 if not self._curr or not self._next: 7911 return None 7912 7913 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7914 if advance: 7915 self._advance(2) 7916 return True 7917 7918 return None 7919 7920 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7921 if not self._match(TokenType.L_PAREN, expression=expression): 7922 self.raise_error("Expecting (") 7923 7924 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7925 if not self._match(TokenType.R_PAREN, expression=expression): 7926 self.raise_error("Expecting )") 7927 7928 def _match_texts(self, texts, advance=True): 7929 if ( 7930 self._curr 7931 and self._curr.token_type != TokenType.STRING 7932 and self._curr.text.upper() in texts 7933 ): 7934 if advance: 7935 self._advance() 7936 return True 7937 return None 7938 7939 def _match_text_seq(self, *texts, advance=True): 7940 index = self._index 7941 for text in texts: 7942 if ( 7943 self._curr 7944 and self._curr.token_type != TokenType.STRING 7945 and self._curr.text.upper() == text 7946 ): 7947 self._advance() 7948 else: 7949 self._retreat(index) 7950 return None 7951 7952 if not advance: 7953 self._retreat(index) 7954 7955 return True 7956 7957 def _replace_lambda( 7958 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7959 ) -> t.Optional[exp.Expression]: 7960 if not node: 7961 return node 7962 7963 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7964 7965 for column in node.find_all(exp.Column): 7966 typ = lambda_types.get(column.parts[0].name) 7967 if typ is not None: 7968 dot_or_id = column.to_dot() if column.table else column.this 7969 7970 if typ: 7971 dot_or_id = self.expression( 7972 exp.Cast, 7973 this=dot_or_id, 7974 to=typ, 7975 ) 7976 7977 parent = column.parent 7978 7979 while isinstance(parent, exp.Dot): 7980 if not isinstance(parent.parent, exp.Dot): 7981 parent.replace(dot_or_id) 7982 break 7983 parent = parent.parent 7984 else: 7985 if column is node: 7986 node = dot_or_id 7987 else: 7988 column.replace(dot_or_id) 7989 return node 7990 7991 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7992 start = self._prev 7993 7994 # Not to be confused with TRUNCATE(number, decimals) function call 7995 if self._match(TokenType.L_PAREN): 7996 self._retreat(self._index - 2) 7997 return self._parse_function() 7998 7999 # Clickhouse supports TRUNCATE DATABASE as well 8000 is_database = self._match(TokenType.DATABASE) 8001 8002 self._match(TokenType.TABLE) 8003 8004 exists = self._parse_exists(not_=False) 8005 8006 expressions = self._parse_csv( 8007 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8008 ) 8009 8010 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8011 8012 if self._match_text_seq("RESTART", "IDENTITY"): 8013 identity = "RESTART" 8014 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8015 identity = "CONTINUE" 8016 else: 8017 identity = None 8018 8019 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8020 option = self._prev.text 8021 else: 8022 option = None 8023 8024 partition = self._parse_partition() 8025 8026 # Fallback case 8027 if self._curr: 8028 return self._parse_as_command(start) 8029 8030 return self.expression( 8031 exp.TruncateTable, 8032 expressions=expressions, 8033 is_database=is_database, 8034 exists=exists, 8035 cluster=cluster, 8036 identity=identity, 8037 option=option, 8038 partition=partition, 8039 ) 8040 8041 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8042 this = self._parse_ordered(self._parse_opclass) 8043 8044 if not self._match(TokenType.WITH): 8045 return this 8046 8047 op = self._parse_var(any_token=True) 8048 8049 return self.expression(exp.WithOperator, this=this, op=op) 8050 8051 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8052 self._match(TokenType.EQ) 8053 self._match(TokenType.L_PAREN) 8054 8055 opts: t.List[t.Optional[exp.Expression]] = [] 8056 option: exp.Expression | None 8057 while self._curr and not self._match(TokenType.R_PAREN): 8058 if self._match_text_seq("FORMAT_NAME", "="): 8059 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8060 option = self._parse_format_name() 8061 else: 8062 option = self._parse_property() 8063 8064 if option is None: 8065 self.raise_error("Unable to parse option") 8066 break 8067 8068 opts.append(option) 8069 8070 return opts 8071 8072 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8073 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8074 8075 options = [] 8076 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8077 option = self._parse_var(any_token=True) 8078 prev = self._prev.text.upper() 8079 8080 # Different dialects might separate options and values by white space, "=" and "AS" 8081 self._match(TokenType.EQ) 8082 self._match(TokenType.ALIAS) 8083 8084 param = self.expression(exp.CopyParameter, this=option) 8085 8086 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8087 TokenType.L_PAREN, advance=False 8088 ): 8089 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8090 param.set("expressions", self._parse_wrapped_options()) 8091 elif prev == "FILE_FORMAT": 8092 # T-SQL's external file format case 8093 param.set("expression", self._parse_field()) 8094 else: 8095 param.set("expression", self._parse_unquoted_field()) 8096 8097 options.append(param) 8098 self._match(sep) 8099 8100 return options 8101 8102 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8103 expr = self.expression(exp.Credentials) 8104 8105 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8106 expr.set("storage", self._parse_field()) 8107 if self._match_text_seq("CREDENTIALS"): 8108 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8109 creds = ( 8110 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8111 ) 8112 expr.set("credentials", creds) 8113 if self._match_text_seq("ENCRYPTION"): 8114 expr.set("encryption", self._parse_wrapped_options()) 8115 if self._match_text_seq("IAM_ROLE"): 8116 expr.set("iam_role", self._parse_field()) 8117 if self._match_text_seq("REGION"): 8118 expr.set("region", self._parse_field()) 8119 8120 return expr 8121 8122 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8123 return self._parse_field() 8124 8125 def _parse_copy(self) -> exp.Copy | exp.Command: 8126 start = self._prev 8127 8128 self._match(TokenType.INTO) 8129 8130 this = ( 8131 self._parse_select(nested=True, parse_subquery_alias=False) 8132 if self._match(TokenType.L_PAREN, advance=False) 8133 else self._parse_table(schema=True) 8134 ) 8135 8136 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8137 8138 files = self._parse_csv(self._parse_file_location) 8139 credentials = self._parse_credentials() 8140 8141 self._match_text_seq("WITH") 8142 8143 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8144 8145 # Fallback case 8146 if self._curr: 8147 return self._parse_as_command(start) 8148 8149 return self.expression( 8150 exp.Copy, 8151 this=this, 8152 kind=kind, 8153 credentials=credentials, 8154 files=files, 8155 params=params, 8156 ) 8157 8158 def _parse_normalize(self) -> exp.Normalize: 8159 return self.expression( 8160 exp.Normalize, 8161 this=self._parse_bitwise(), 8162 form=self._match(TokenType.COMMA) and self._parse_var(), 8163 ) 8164 8165 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8166 args = self._parse_csv(lambda: self._parse_lambda()) 8167 8168 this = seq_get(args, 0) 8169 decimals = seq_get(args, 1) 8170 8171 return expr_type( 8172 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8173 ) 8174 8175 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8176 if self._match_text_seq("COLUMNS", "(", advance=False): 8177 this = self._parse_function() 8178 if isinstance(this, exp.Columns): 8179 this.set("unpack", True) 8180 return this 8181 8182 return self.expression( 8183 exp.Star, 8184 **{ # type: ignore 8185 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8186 "replace": self._parse_star_op("REPLACE"), 8187 "rename": self._parse_star_op("RENAME"), 8188 }, 8189 ) 8190 8191 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8192 privilege_parts = [] 8193 8194 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8195 # (end of privilege list) or L_PAREN (start of column list) are met 8196 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8197 privilege_parts.append(self._curr.text.upper()) 8198 self._advance() 8199 8200 this = exp.var(" ".join(privilege_parts)) 8201 expressions = ( 8202 self._parse_wrapped_csv(self._parse_column) 8203 if self._match(TokenType.L_PAREN, advance=False) 8204 else None 8205 ) 8206 8207 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8208 8209 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8210 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8211 principal = self._parse_id_var() 8212 8213 if not principal: 8214 return None 8215 8216 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8217 8218 def _parse_grant(self) -> exp.Grant | exp.Command: 8219 start = self._prev 8220 8221 privileges = self._parse_csv(self._parse_grant_privilege) 8222 8223 self._match(TokenType.ON) 8224 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8225 8226 # Attempt to parse the securable e.g. MySQL allows names 8227 # such as "foo.*", "*.*" which are not easily parseable yet 8228 securable = self._try_parse(self._parse_table_parts) 8229 8230 if not securable or not self._match_text_seq("TO"): 8231 return self._parse_as_command(start) 8232 8233 principals = self._parse_csv(self._parse_grant_principal) 8234 8235 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8236 8237 if self._curr: 8238 return self._parse_as_command(start) 8239 8240 return self.expression( 8241 exp.Grant, 8242 privileges=privileges, 8243 kind=kind, 8244 securable=securable, 8245 principals=principals, 8246 grant_option=grant_option, 8247 ) 8248 8249 def _parse_overlay(self) -> exp.Overlay: 8250 return self.expression( 8251 exp.Overlay, 8252 **{ # type: ignore 8253 "this": self._parse_bitwise(), 8254 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8255 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8256 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8257 }, 8258 ) 8259 8260 def _parse_format_name(self) -> exp.Property: 8261 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8262 # for FILE_FORMAT = <format_name> 8263 return self.expression( 8264 exp.Property, 8265 this=exp.var("FORMAT_NAME"), 8266 value=self._parse_string() or self._parse_table_parts(), 8267 ) 8268 8269 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8270 args: t.List[exp.Expression] = [] 8271 8272 if self._match(TokenType.DISTINCT): 8273 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8274 self._match(TokenType.COMMA) 8275 8276 args.extend(self._parse_csv(self._parse_assignment)) 8277 8278 return self.expression( 8279 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8280 ) 8281 8282 def _identifier_expression( 8283 self, token: t.Optional[Token] = None, **kwargs: t.Any 8284 ) -> exp.Identifier: 8285 token = token or self._prev 8286 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8287 expression.update_positions(token) 8288 return expression
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
178class Parser(metaclass=_Parser): 179 """ 180 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 181 182 Args: 183 error_level: The desired error level. 184 Default: ErrorLevel.IMMEDIATE 185 error_message_context: The amount of context to capture from a query string when displaying 186 the error message (in number of characters). 187 Default: 100 188 max_errors: Maximum number of error messages to include in a raised ParseError. 189 This is only relevant if error_level is ErrorLevel.RAISE. 190 Default: 3 191 """ 192 193 FUNCTIONS: t.Dict[str, t.Callable] = { 194 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 195 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 196 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 197 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "CHAR": lambda args: exp.Chr(expressions=args), 204 "CHR": lambda args: exp.Chr(expressions=args), 205 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 206 "CONCAT": lambda args, dialect: exp.Concat( 207 expressions=args, 208 safe=not dialect.STRICT_STRING_CONCAT, 209 coalesce=dialect.CONCAT_COALESCE, 210 ), 211 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 212 expressions=args, 213 safe=not dialect.STRICT_STRING_CONCAT, 214 coalesce=dialect.CONCAT_COALESCE, 215 ), 216 "CONVERT_TIMEZONE": build_convert_timezone, 217 "DATE_TO_DATE_STR": lambda args: exp.Cast( 218 this=seq_get(args, 0), 219 to=exp.DataType(this=exp.DataType.Type.TEXT), 220 ), 221 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 222 start=seq_get(args, 0), 223 end=seq_get(args, 1), 224 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 225 ), 226 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 227 "HEX": build_hex, 228 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 229 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 230 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 231 "LIKE": build_like, 232 "LOG": build_logarithm, 233 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 234 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 235 "LOWER": build_lower, 236 "LPAD": lambda args: build_pad(args), 237 "LEFTPAD": lambda args: build_pad(args), 238 "LTRIM": lambda args: build_trim(args), 239 "MOD": build_mod, 240 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 241 "RPAD": lambda args: build_pad(args, is_left=False), 242 "RTRIM": lambda args: build_trim(args, is_left=False), 243 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 244 if len(args) != 2 245 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 246 "STRPOS": exp.StrPosition.from_arg_list, 247 "CHARINDEX": lambda args: build_locate_strposition(args), 248 "INSTR": exp.StrPosition.from_arg_list, 249 "LOCATE": lambda args: build_locate_strposition(args), 250 "TIME_TO_TIME_STR": lambda args: exp.Cast( 251 this=seq_get(args, 0), 252 to=exp.DataType(this=exp.DataType.Type.TEXT), 253 ), 254 "TO_HEX": build_hex, 255 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 256 this=exp.Cast( 257 this=seq_get(args, 0), 258 to=exp.DataType(this=exp.DataType.Type.TEXT), 259 ), 260 start=exp.Literal.number(1), 261 length=exp.Literal.number(10), 262 ), 263 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 264 "UPPER": build_upper, 265 "VAR_MAP": build_var_map, 266 } 267 268 NO_PAREN_FUNCTIONS = { 269 TokenType.CURRENT_DATE: exp.CurrentDate, 270 TokenType.CURRENT_DATETIME: exp.CurrentDate, 271 TokenType.CURRENT_TIME: exp.CurrentTime, 272 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 273 TokenType.CURRENT_USER: exp.CurrentUser, 274 } 275 276 STRUCT_TYPE_TOKENS = { 277 TokenType.NESTED, 278 TokenType.OBJECT, 279 TokenType.STRUCT, 280 TokenType.UNION, 281 } 282 283 NESTED_TYPE_TOKENS = { 284 TokenType.ARRAY, 285 TokenType.LIST, 286 TokenType.LOWCARDINALITY, 287 TokenType.MAP, 288 TokenType.NULLABLE, 289 TokenType.RANGE, 290 *STRUCT_TYPE_TOKENS, 291 } 292 293 ENUM_TYPE_TOKENS = { 294 TokenType.DYNAMIC, 295 TokenType.ENUM, 296 TokenType.ENUM8, 297 TokenType.ENUM16, 298 } 299 300 AGGREGATE_TYPE_TOKENS = { 301 TokenType.AGGREGATEFUNCTION, 302 TokenType.SIMPLEAGGREGATEFUNCTION, 303 } 304 305 TYPE_TOKENS = { 306 TokenType.BIT, 307 TokenType.BOOLEAN, 308 TokenType.TINYINT, 309 TokenType.UTINYINT, 310 TokenType.SMALLINT, 311 TokenType.USMALLINT, 312 TokenType.INT, 313 TokenType.UINT, 314 TokenType.BIGINT, 315 TokenType.UBIGINT, 316 TokenType.INT128, 317 TokenType.UINT128, 318 TokenType.INT256, 319 TokenType.UINT256, 320 TokenType.MEDIUMINT, 321 TokenType.UMEDIUMINT, 322 TokenType.FIXEDSTRING, 323 TokenType.FLOAT, 324 TokenType.DOUBLE, 325 TokenType.UDOUBLE, 326 TokenType.CHAR, 327 TokenType.NCHAR, 328 TokenType.VARCHAR, 329 TokenType.NVARCHAR, 330 TokenType.BPCHAR, 331 TokenType.TEXT, 332 TokenType.MEDIUMTEXT, 333 TokenType.LONGTEXT, 334 TokenType.BLOB, 335 TokenType.MEDIUMBLOB, 336 TokenType.LONGBLOB, 337 TokenType.BINARY, 338 TokenType.VARBINARY, 339 TokenType.JSON, 340 TokenType.JSONB, 341 TokenType.INTERVAL, 342 TokenType.TINYBLOB, 343 TokenType.TINYTEXT, 344 TokenType.TIME, 345 TokenType.TIMETZ, 346 TokenType.TIMESTAMP, 347 TokenType.TIMESTAMP_S, 348 TokenType.TIMESTAMP_MS, 349 TokenType.TIMESTAMP_NS, 350 TokenType.TIMESTAMPTZ, 351 TokenType.TIMESTAMPLTZ, 352 TokenType.TIMESTAMPNTZ, 353 TokenType.DATETIME, 354 TokenType.DATETIME2, 355 TokenType.DATETIME64, 356 TokenType.SMALLDATETIME, 357 TokenType.DATE, 358 TokenType.DATE32, 359 TokenType.INT4RANGE, 360 TokenType.INT4MULTIRANGE, 361 TokenType.INT8RANGE, 362 TokenType.INT8MULTIRANGE, 363 TokenType.NUMRANGE, 364 TokenType.NUMMULTIRANGE, 365 TokenType.TSRANGE, 366 TokenType.TSMULTIRANGE, 367 TokenType.TSTZRANGE, 368 TokenType.TSTZMULTIRANGE, 369 TokenType.DATERANGE, 370 TokenType.DATEMULTIRANGE, 371 TokenType.DECIMAL, 372 TokenType.DECIMAL32, 373 TokenType.DECIMAL64, 374 TokenType.DECIMAL128, 375 TokenType.DECIMAL256, 376 TokenType.UDECIMAL, 377 TokenType.BIGDECIMAL, 378 TokenType.UUID, 379 TokenType.GEOGRAPHY, 380 TokenType.GEOMETRY, 381 TokenType.POINT, 382 TokenType.RING, 383 TokenType.LINESTRING, 384 TokenType.MULTILINESTRING, 385 TokenType.POLYGON, 386 TokenType.MULTIPOLYGON, 387 TokenType.HLLSKETCH, 388 TokenType.HSTORE, 389 TokenType.PSEUDO_TYPE, 390 TokenType.SUPER, 391 TokenType.SERIAL, 392 TokenType.SMALLSERIAL, 393 TokenType.BIGSERIAL, 394 TokenType.XML, 395 TokenType.YEAR, 396 TokenType.USERDEFINED, 397 TokenType.MONEY, 398 TokenType.SMALLMONEY, 399 TokenType.ROWVERSION, 400 TokenType.IMAGE, 401 TokenType.VARIANT, 402 TokenType.VECTOR, 403 TokenType.VOID, 404 TokenType.OBJECT, 405 TokenType.OBJECT_IDENTIFIER, 406 TokenType.INET, 407 TokenType.IPADDRESS, 408 TokenType.IPPREFIX, 409 TokenType.IPV4, 410 TokenType.IPV6, 411 TokenType.UNKNOWN, 412 TokenType.NOTHING, 413 TokenType.NULL, 414 TokenType.NAME, 415 TokenType.TDIGEST, 416 TokenType.DYNAMIC, 417 *ENUM_TYPE_TOKENS, 418 *NESTED_TYPE_TOKENS, 419 *AGGREGATE_TYPE_TOKENS, 420 } 421 422 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 423 TokenType.BIGINT: TokenType.UBIGINT, 424 TokenType.INT: TokenType.UINT, 425 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 426 TokenType.SMALLINT: TokenType.USMALLINT, 427 TokenType.TINYINT: TokenType.UTINYINT, 428 TokenType.DECIMAL: TokenType.UDECIMAL, 429 TokenType.DOUBLE: TokenType.UDOUBLE, 430 } 431 432 SUBQUERY_PREDICATES = { 433 TokenType.ANY: exp.Any, 434 TokenType.ALL: exp.All, 435 TokenType.EXISTS: exp.Exists, 436 TokenType.SOME: exp.Any, 437 } 438 439 RESERVED_TOKENS = { 440 *Tokenizer.SINGLE_TOKENS.values(), 441 TokenType.SELECT, 442 } - {TokenType.IDENTIFIER} 443 444 DB_CREATABLES = { 445 TokenType.DATABASE, 446 TokenType.DICTIONARY, 447 TokenType.FILE_FORMAT, 448 TokenType.MODEL, 449 TokenType.NAMESPACE, 450 TokenType.SCHEMA, 451 TokenType.SEQUENCE, 452 TokenType.SINK, 453 TokenType.SOURCE, 454 TokenType.STAGE, 455 TokenType.STORAGE_INTEGRATION, 456 TokenType.STREAMLIT, 457 TokenType.TABLE, 458 TokenType.TAG, 459 TokenType.VIEW, 460 TokenType.WAREHOUSE, 461 } 462 463 CREATABLES = { 464 TokenType.COLUMN, 465 TokenType.CONSTRAINT, 466 TokenType.FOREIGN_KEY, 467 TokenType.FUNCTION, 468 TokenType.INDEX, 469 TokenType.PROCEDURE, 470 *DB_CREATABLES, 471 } 472 473 ALTERABLES = { 474 TokenType.INDEX, 475 TokenType.TABLE, 476 TokenType.VIEW, 477 } 478 479 # Tokens that can represent identifiers 480 ID_VAR_TOKENS = { 481 TokenType.ALL, 482 TokenType.ATTACH, 483 TokenType.VAR, 484 TokenType.ANTI, 485 TokenType.APPLY, 486 TokenType.ASC, 487 TokenType.ASOF, 488 TokenType.AUTO_INCREMENT, 489 TokenType.BEGIN, 490 TokenType.BPCHAR, 491 TokenType.CACHE, 492 TokenType.CASE, 493 TokenType.COLLATE, 494 TokenType.COMMAND, 495 TokenType.COMMENT, 496 TokenType.COMMIT, 497 TokenType.CONSTRAINT, 498 TokenType.COPY, 499 TokenType.CUBE, 500 TokenType.CURRENT_SCHEMA, 501 TokenType.DEFAULT, 502 TokenType.DELETE, 503 TokenType.DESC, 504 TokenType.DESCRIBE, 505 TokenType.DETACH, 506 TokenType.DICTIONARY, 507 TokenType.DIV, 508 TokenType.END, 509 TokenType.EXECUTE, 510 TokenType.EXPORT, 511 TokenType.ESCAPE, 512 TokenType.FALSE, 513 TokenType.FIRST, 514 TokenType.FILTER, 515 TokenType.FINAL, 516 TokenType.FORMAT, 517 TokenType.FULL, 518 TokenType.GET, 519 TokenType.IDENTIFIER, 520 TokenType.IS, 521 TokenType.ISNULL, 522 TokenType.INTERVAL, 523 TokenType.KEEP, 524 TokenType.KILL, 525 TokenType.LEFT, 526 TokenType.LIMIT, 527 TokenType.LOAD, 528 TokenType.MERGE, 529 TokenType.NATURAL, 530 TokenType.NEXT, 531 TokenType.OFFSET, 532 TokenType.OPERATOR, 533 TokenType.ORDINALITY, 534 TokenType.OVERLAPS, 535 TokenType.OVERWRITE, 536 TokenType.PARTITION, 537 TokenType.PERCENT, 538 TokenType.PIVOT, 539 TokenType.PRAGMA, 540 TokenType.PUT, 541 TokenType.RANGE, 542 TokenType.RECURSIVE, 543 TokenType.REFERENCES, 544 TokenType.REFRESH, 545 TokenType.RENAME, 546 TokenType.REPLACE, 547 TokenType.RIGHT, 548 TokenType.ROLLUP, 549 TokenType.ROW, 550 TokenType.ROWS, 551 TokenType.SEMI, 552 TokenType.SET, 553 TokenType.SETTINGS, 554 TokenType.SHOW, 555 TokenType.TEMPORARY, 556 TokenType.TOP, 557 TokenType.TRUE, 558 TokenType.TRUNCATE, 559 TokenType.UNIQUE, 560 TokenType.UNNEST, 561 TokenType.UNPIVOT, 562 TokenType.UPDATE, 563 TokenType.USE, 564 TokenType.VOLATILE, 565 TokenType.WINDOW, 566 *CREATABLES, 567 *SUBQUERY_PREDICATES, 568 *TYPE_TOKENS, 569 *NO_PAREN_FUNCTIONS, 570 } 571 ID_VAR_TOKENS.remove(TokenType.UNION) 572 573 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 574 TokenType.ANTI, 575 TokenType.APPLY, 576 TokenType.ASOF, 577 TokenType.FULL, 578 TokenType.LEFT, 579 TokenType.LOCK, 580 TokenType.NATURAL, 581 TokenType.RIGHT, 582 TokenType.SEMI, 583 TokenType.WINDOW, 584 } 585 586 ALIAS_TOKENS = ID_VAR_TOKENS 587 588 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 589 590 ARRAY_CONSTRUCTORS = { 591 "ARRAY": exp.Array, 592 "LIST": exp.List, 593 } 594 595 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 596 597 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 598 599 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 600 601 FUNC_TOKENS = { 602 TokenType.COLLATE, 603 TokenType.COMMAND, 604 TokenType.CURRENT_DATE, 605 TokenType.CURRENT_DATETIME, 606 TokenType.CURRENT_SCHEMA, 607 TokenType.CURRENT_TIMESTAMP, 608 TokenType.CURRENT_TIME, 609 TokenType.CURRENT_USER, 610 TokenType.FILTER, 611 TokenType.FIRST, 612 TokenType.FORMAT, 613 TokenType.GET, 614 TokenType.GLOB, 615 TokenType.IDENTIFIER, 616 TokenType.INDEX, 617 TokenType.ISNULL, 618 TokenType.ILIKE, 619 TokenType.INSERT, 620 TokenType.LIKE, 621 TokenType.MERGE, 622 TokenType.NEXT, 623 TokenType.OFFSET, 624 TokenType.PRIMARY_KEY, 625 TokenType.RANGE, 626 TokenType.REPLACE, 627 TokenType.RLIKE, 628 TokenType.ROW, 629 TokenType.UNNEST, 630 TokenType.VAR, 631 TokenType.LEFT, 632 TokenType.RIGHT, 633 TokenType.SEQUENCE, 634 TokenType.DATE, 635 TokenType.DATETIME, 636 TokenType.TABLE, 637 TokenType.TIMESTAMP, 638 TokenType.TIMESTAMPTZ, 639 TokenType.TRUNCATE, 640 TokenType.WINDOW, 641 TokenType.XOR, 642 *TYPE_TOKENS, 643 *SUBQUERY_PREDICATES, 644 } 645 646 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.AND: exp.And, 648 } 649 650 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.COLON_EQ: exp.PropertyEQ, 652 } 653 654 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.OR: exp.Or, 656 } 657 658 EQUALITY = { 659 TokenType.EQ: exp.EQ, 660 TokenType.NEQ: exp.NEQ, 661 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 662 } 663 664 COMPARISON = { 665 TokenType.GT: exp.GT, 666 TokenType.GTE: exp.GTE, 667 TokenType.LT: exp.LT, 668 TokenType.LTE: exp.LTE, 669 } 670 671 BITWISE = { 672 TokenType.AMP: exp.BitwiseAnd, 673 TokenType.CARET: exp.BitwiseXor, 674 TokenType.PIPE: exp.BitwiseOr, 675 } 676 677 TERM = { 678 TokenType.DASH: exp.Sub, 679 TokenType.PLUS: exp.Add, 680 TokenType.MOD: exp.Mod, 681 TokenType.COLLATE: exp.Collate, 682 } 683 684 FACTOR = { 685 TokenType.DIV: exp.IntDiv, 686 TokenType.LR_ARROW: exp.Distance, 687 TokenType.SLASH: exp.Div, 688 TokenType.STAR: exp.Mul, 689 } 690 691 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 692 693 TIMES = { 694 TokenType.TIME, 695 TokenType.TIMETZ, 696 } 697 698 TIMESTAMPS = { 699 TokenType.TIMESTAMP, 700 TokenType.TIMESTAMPNTZ, 701 TokenType.TIMESTAMPTZ, 702 TokenType.TIMESTAMPLTZ, 703 *TIMES, 704 } 705 706 SET_OPERATIONS = { 707 TokenType.UNION, 708 TokenType.INTERSECT, 709 TokenType.EXCEPT, 710 } 711 712 JOIN_METHODS = { 713 TokenType.ASOF, 714 TokenType.NATURAL, 715 TokenType.POSITIONAL, 716 } 717 718 JOIN_SIDES = { 719 TokenType.LEFT, 720 TokenType.RIGHT, 721 TokenType.FULL, 722 } 723 724 JOIN_KINDS = { 725 TokenType.ANTI, 726 TokenType.CROSS, 727 TokenType.INNER, 728 TokenType.OUTER, 729 TokenType.SEMI, 730 TokenType.STRAIGHT_JOIN, 731 } 732 733 JOIN_HINTS: t.Set[str] = set() 734 735 LAMBDAS = { 736 TokenType.ARROW: lambda self, expressions: self.expression( 737 exp.Lambda, 738 this=self._replace_lambda( 739 self._parse_assignment(), 740 expressions, 741 ), 742 expressions=expressions, 743 ), 744 TokenType.FARROW: lambda self, expressions: self.expression( 745 exp.Kwarg, 746 this=exp.var(expressions[0].name), 747 expression=self._parse_assignment(), 748 ), 749 } 750 751 COLUMN_OPERATORS = { 752 TokenType.DOT: None, 753 TokenType.DOTCOLON: lambda self, this, to: self.expression( 754 exp.JSONCast, 755 this=this, 756 to=to, 757 ), 758 TokenType.DCOLON: lambda self, this, to: self.expression( 759 exp.Cast if self.STRICT_CAST else exp.TryCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.ARROW: lambda self, this, path: self.expression( 764 exp.JSONExtract, 765 this=this, 766 expression=self.dialect.to_json_path(path), 767 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 768 ), 769 TokenType.DARROW: lambda self, this, path: self.expression( 770 exp.JSONExtractScalar, 771 this=this, 772 expression=self.dialect.to_json_path(path), 773 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 774 ), 775 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 776 exp.JSONBExtract, 777 this=this, 778 expression=path, 779 ), 780 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 781 exp.JSONBExtractScalar, 782 this=this, 783 expression=path, 784 ), 785 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 786 exp.JSONBContains, 787 this=this, 788 expression=key, 789 ), 790 } 791 792 EXPRESSION_PARSERS = { 793 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 794 exp.Column: lambda self: self._parse_column(), 795 exp.Condition: lambda self: self._parse_assignment(), 796 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 797 exp.Expression: lambda self: self._parse_expression(), 798 exp.From: lambda self: self._parse_from(joins=True), 799 exp.Group: lambda self: self._parse_group(), 800 exp.Having: lambda self: self._parse_having(), 801 exp.Hint: lambda self: self._parse_hint_body(), 802 exp.Identifier: lambda self: self._parse_id_var(), 803 exp.Join: lambda self: self._parse_join(), 804 exp.Lambda: lambda self: self._parse_lambda(), 805 exp.Lateral: lambda self: self._parse_lateral(), 806 exp.Limit: lambda self: self._parse_limit(), 807 exp.Offset: lambda self: self._parse_offset(), 808 exp.Order: lambda self: self._parse_order(), 809 exp.Ordered: lambda self: self._parse_ordered(), 810 exp.Properties: lambda self: self._parse_properties(), 811 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 812 exp.Qualify: lambda self: self._parse_qualify(), 813 exp.Returning: lambda self: self._parse_returning(), 814 exp.Select: lambda self: self._parse_select(), 815 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 816 exp.Table: lambda self: self._parse_table_parts(), 817 exp.TableAlias: lambda self: self._parse_table_alias(), 818 exp.Tuple: lambda self: self._parse_value(values=False), 819 exp.Whens: lambda self: self._parse_when_matched(), 820 exp.Where: lambda self: self._parse_where(), 821 exp.Window: lambda self: self._parse_named_window(), 822 exp.With: lambda self: self._parse_with(), 823 "JOIN_TYPE": lambda self: self._parse_join_parts(), 824 } 825 826 STATEMENT_PARSERS = { 827 TokenType.ALTER: lambda self: self._parse_alter(), 828 TokenType.ANALYZE: lambda self: self._parse_analyze(), 829 TokenType.BEGIN: lambda self: self._parse_transaction(), 830 TokenType.CACHE: lambda self: self._parse_cache(), 831 TokenType.COMMENT: lambda self: self._parse_comment(), 832 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 833 TokenType.COPY: lambda self: self._parse_copy(), 834 TokenType.CREATE: lambda self: self._parse_create(), 835 TokenType.DELETE: lambda self: self._parse_delete(), 836 TokenType.DESC: lambda self: self._parse_describe(), 837 TokenType.DESCRIBE: lambda self: self._parse_describe(), 838 TokenType.DROP: lambda self: self._parse_drop(), 839 TokenType.GRANT: lambda self: self._parse_grant(), 840 TokenType.INSERT: lambda self: self._parse_insert(), 841 TokenType.KILL: lambda self: self._parse_kill(), 842 TokenType.LOAD: lambda self: self._parse_load(), 843 TokenType.MERGE: lambda self: self._parse_merge(), 844 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 845 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 846 TokenType.REFRESH: lambda self: self._parse_refresh(), 847 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 848 TokenType.SET: lambda self: self._parse_set(), 849 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 850 TokenType.UNCACHE: lambda self: self._parse_uncache(), 851 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 852 TokenType.UPDATE: lambda self: self._parse_update(), 853 TokenType.USE: lambda self: self._parse_use(), 854 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 855 } 856 857 UNARY_PARSERS = { 858 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 859 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 860 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 861 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 862 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 863 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 864 } 865 866 STRING_PARSERS = { 867 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 868 exp.RawString, this=token.text 869 ), 870 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 871 exp.National, this=token.text 872 ), 873 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 874 TokenType.STRING: lambda self, token: self.expression( 875 exp.Literal, this=token.text, is_string=True 876 ), 877 TokenType.UNICODE_STRING: lambda self, token: self.expression( 878 exp.UnicodeString, 879 this=token.text, 880 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 881 ), 882 } 883 884 NUMERIC_PARSERS = { 885 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 886 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 887 TokenType.HEX_STRING: lambda self, token: self.expression( 888 exp.HexString, 889 this=token.text, 890 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 891 ), 892 TokenType.NUMBER: lambda self, token: self.expression( 893 exp.Literal, this=token.text, is_string=False 894 ), 895 } 896 897 PRIMARY_PARSERS = { 898 **STRING_PARSERS, 899 **NUMERIC_PARSERS, 900 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 901 TokenType.NULL: lambda self, _: self.expression(exp.Null), 902 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 903 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 904 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 905 TokenType.STAR: lambda self, _: self._parse_star_ops(), 906 } 907 908 PLACEHOLDER_PARSERS = { 909 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 910 TokenType.PARAMETER: lambda self: self._parse_parameter(), 911 TokenType.COLON: lambda self: ( 912 self.expression(exp.Placeholder, this=self._prev.text) 913 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 914 else None 915 ), 916 } 917 918 RANGE_PARSERS = { 919 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 920 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 921 TokenType.GLOB: binary_range_parser(exp.Glob), 922 TokenType.ILIKE: binary_range_parser(exp.ILike), 923 TokenType.IN: lambda self, this: self._parse_in(this), 924 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 925 TokenType.IS: lambda self, this: self._parse_is(this), 926 TokenType.LIKE: binary_range_parser(exp.Like), 927 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 928 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 929 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 930 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 931 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 932 } 933 934 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 935 "ALLOWED_VALUES": lambda self: self.expression( 936 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 937 ), 938 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 939 "AUTO": lambda self: self._parse_auto_property(), 940 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 941 "BACKUP": lambda self: self.expression( 942 exp.BackupProperty, this=self._parse_var(any_token=True) 943 ), 944 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 945 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 946 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 947 "CHECKSUM": lambda self: self._parse_checksum(), 948 "CLUSTER BY": lambda self: self._parse_cluster(), 949 "CLUSTERED": lambda self: self._parse_clustered_by(), 950 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 951 exp.CollateProperty, **kwargs 952 ), 953 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 954 "CONTAINS": lambda self: self._parse_contains_property(), 955 "COPY": lambda self: self._parse_copy_property(), 956 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 957 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 958 "DEFINER": lambda self: self._parse_definer(), 959 "DETERMINISTIC": lambda self: self.expression( 960 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 961 ), 962 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 963 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 964 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 965 "DISTKEY": lambda self: self._parse_distkey(), 966 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 967 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 968 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 969 "ENVIRONMENT": lambda self: self.expression( 970 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 971 ), 972 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 973 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 974 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 975 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 976 "FREESPACE": lambda self: self._parse_freespace(), 977 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 978 "HEAP": lambda self: self.expression(exp.HeapProperty), 979 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 980 "IMMUTABLE": lambda self: self.expression( 981 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 982 ), 983 "INHERITS": lambda self: self.expression( 984 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 985 ), 986 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 987 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 988 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 989 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 990 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 991 "LIKE": lambda self: self._parse_create_like(), 992 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 993 "LOCK": lambda self: self._parse_locking(), 994 "LOCKING": lambda self: self._parse_locking(), 995 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 996 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 997 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 998 "MODIFIES": lambda self: self._parse_modifies_property(), 999 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1000 "NO": lambda self: self._parse_no_property(), 1001 "ON": lambda self: self._parse_on_property(), 1002 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1003 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1004 "PARTITION": lambda self: self._parse_partitioned_of(), 1005 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1006 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1007 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1008 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1009 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1010 "READS": lambda self: self._parse_reads_property(), 1011 "REMOTE": lambda self: self._parse_remote_with_connection(), 1012 "RETURNS": lambda self: self._parse_returns(), 1013 "STRICT": lambda self: self.expression(exp.StrictProperty), 1014 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1015 "ROW": lambda self: self._parse_row(), 1016 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1017 "SAMPLE": lambda self: self.expression( 1018 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1019 ), 1020 "SECURE": lambda self: self.expression(exp.SecureProperty), 1021 "SECURITY": lambda self: self._parse_security(), 1022 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1023 "SETTINGS": lambda self: self._parse_settings_property(), 1024 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1025 "SORTKEY": lambda self: self._parse_sortkey(), 1026 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1027 "STABLE": lambda self: self.expression( 1028 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1029 ), 1030 "STORED": lambda self: self._parse_stored(), 1031 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1032 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1033 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1034 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1035 "TO": lambda self: self._parse_to_table(), 1036 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1037 "TRANSFORM": lambda self: self.expression( 1038 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1039 ), 1040 "TTL": lambda self: self._parse_ttl(), 1041 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1042 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1043 "VOLATILE": lambda self: self._parse_volatile_property(), 1044 "WITH": lambda self: self._parse_with_property(), 1045 } 1046 1047 CONSTRAINT_PARSERS = { 1048 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1049 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1050 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1051 "CHARACTER SET": lambda self: self.expression( 1052 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1053 ), 1054 "CHECK": lambda self: self.expression( 1055 exp.CheckColumnConstraint, 1056 this=self._parse_wrapped(self._parse_assignment), 1057 enforced=self._match_text_seq("ENFORCED"), 1058 ), 1059 "COLLATE": lambda self: self.expression( 1060 exp.CollateColumnConstraint, 1061 this=self._parse_identifier() or self._parse_column(), 1062 ), 1063 "COMMENT": lambda self: self.expression( 1064 exp.CommentColumnConstraint, this=self._parse_string() 1065 ), 1066 "COMPRESS": lambda self: self._parse_compress(), 1067 "CLUSTERED": lambda self: self.expression( 1068 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1069 ), 1070 "NONCLUSTERED": lambda self: self.expression( 1071 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1072 ), 1073 "DEFAULT": lambda self: self.expression( 1074 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1075 ), 1076 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1077 "EPHEMERAL": lambda self: self.expression( 1078 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1079 ), 1080 "EXCLUDE": lambda self: self.expression( 1081 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1082 ), 1083 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1084 "FORMAT": lambda self: self.expression( 1085 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1086 ), 1087 "GENERATED": lambda self: self._parse_generated_as_identity(), 1088 "IDENTITY": lambda self: self._parse_auto_increment(), 1089 "INLINE": lambda self: self._parse_inline(), 1090 "LIKE": lambda self: self._parse_create_like(), 1091 "NOT": lambda self: self._parse_not_constraint(), 1092 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1093 "ON": lambda self: ( 1094 self._match(TokenType.UPDATE) 1095 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1096 ) 1097 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1098 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1099 "PERIOD": lambda self: self._parse_period_for_system_time(), 1100 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1101 "REFERENCES": lambda self: self._parse_references(match=False), 1102 "TITLE": lambda self: self.expression( 1103 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1104 ), 1105 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1106 "UNIQUE": lambda self: self._parse_unique(), 1107 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1108 "WATERMARK": lambda self: self.expression( 1109 exp.WatermarkColumnConstraint, 1110 this=self._match(TokenType.FOR) and self._parse_column(), 1111 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1112 ), 1113 "WITH": lambda self: self.expression( 1114 exp.Properties, expressions=self._parse_wrapped_properties() 1115 ), 1116 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1117 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1118 } 1119 1120 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1121 klass = ( 1122 exp.PartitionedByBucket 1123 if self._prev.text.upper() == "BUCKET" 1124 else exp.PartitionByTruncate 1125 ) 1126 1127 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1128 this, expression = seq_get(args, 0), seq_get(args, 1) 1129 1130 if isinstance(this, exp.Literal): 1131 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1132 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1133 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1134 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1135 # 1136 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1137 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1138 this, expression = expression, this 1139 1140 return self.expression(klass, this=this, expression=expression) 1141 1142 ALTER_PARSERS = { 1143 "ADD": lambda self: self._parse_alter_table_add(), 1144 "AS": lambda self: self._parse_select(), 1145 "ALTER": lambda self: self._parse_alter_table_alter(), 1146 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1147 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1148 "DROP": lambda self: self._parse_alter_table_drop(), 1149 "RENAME": lambda self: self._parse_alter_table_rename(), 1150 "SET": lambda self: self._parse_alter_table_set(), 1151 "SWAP": lambda self: self.expression( 1152 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1153 ), 1154 } 1155 1156 ALTER_ALTER_PARSERS = { 1157 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1158 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1159 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1160 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1161 } 1162 1163 SCHEMA_UNNAMED_CONSTRAINTS = { 1164 "CHECK", 1165 "EXCLUDE", 1166 "FOREIGN KEY", 1167 "LIKE", 1168 "PERIOD", 1169 "PRIMARY KEY", 1170 "UNIQUE", 1171 "WATERMARK", 1172 "BUCKET", 1173 "TRUNCATE", 1174 } 1175 1176 NO_PAREN_FUNCTION_PARSERS = { 1177 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1178 "CASE": lambda self: self._parse_case(), 1179 "CONNECT_BY_ROOT": lambda self: self.expression( 1180 exp.ConnectByRoot, this=self._parse_column() 1181 ), 1182 "IF": lambda self: self._parse_if(), 1183 } 1184 1185 INVALID_FUNC_NAME_TOKENS = { 1186 TokenType.IDENTIFIER, 1187 TokenType.STRING, 1188 } 1189 1190 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1191 1192 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1193 1194 FUNCTION_PARSERS = { 1195 **{ 1196 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1197 }, 1198 **{ 1199 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1200 }, 1201 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1202 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1203 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1204 "DECODE": lambda self: self._parse_decode(), 1205 "EXTRACT": lambda self: self._parse_extract(), 1206 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1207 "GAP_FILL": lambda self: self._parse_gap_fill(), 1208 "JSON_OBJECT": lambda self: self._parse_json_object(), 1209 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1210 "JSON_TABLE": lambda self: self._parse_json_table(), 1211 "MATCH": lambda self: self._parse_match_against(), 1212 "NORMALIZE": lambda self: self._parse_normalize(), 1213 "OPENJSON": lambda self: self._parse_open_json(), 1214 "OVERLAY": lambda self: self._parse_overlay(), 1215 "POSITION": lambda self: self._parse_position(), 1216 "PREDICT": lambda self: self._parse_predict(), 1217 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1218 "STRING_AGG": lambda self: self._parse_string_agg(), 1219 "SUBSTRING": lambda self: self._parse_substring(), 1220 "TRIM": lambda self: self._parse_trim(), 1221 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1222 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1223 "XMLELEMENT": lambda self: self.expression( 1224 exp.XMLElement, 1225 this=self._match_text_seq("NAME") and self._parse_id_var(), 1226 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1227 ), 1228 "XMLTABLE": lambda self: self._parse_xml_table(), 1229 } 1230 1231 QUERY_MODIFIER_PARSERS = { 1232 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1233 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1234 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1235 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1236 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1237 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1238 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1239 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1240 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1241 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1242 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1243 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1244 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1245 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1246 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1247 TokenType.CLUSTER_BY: lambda self: ( 1248 "cluster", 1249 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1250 ), 1251 TokenType.DISTRIBUTE_BY: lambda self: ( 1252 "distribute", 1253 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1254 ), 1255 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1256 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1257 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1258 } 1259 1260 SET_PARSERS = { 1261 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1262 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1263 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1264 "TRANSACTION": lambda self: self._parse_set_transaction(), 1265 } 1266 1267 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1268 1269 TYPE_LITERAL_PARSERS = { 1270 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1271 } 1272 1273 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1274 1275 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1276 1277 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1278 1279 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1280 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1281 "ISOLATION": ( 1282 ("LEVEL", "REPEATABLE", "READ"), 1283 ("LEVEL", "READ", "COMMITTED"), 1284 ("LEVEL", "READ", "UNCOMITTED"), 1285 ("LEVEL", "SERIALIZABLE"), 1286 ), 1287 "READ": ("WRITE", "ONLY"), 1288 } 1289 1290 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1291 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1292 ) 1293 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1294 1295 CREATE_SEQUENCE: OPTIONS_TYPE = { 1296 "SCALE": ("EXTEND", "NOEXTEND"), 1297 "SHARD": ("EXTEND", "NOEXTEND"), 1298 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1299 **dict.fromkeys( 1300 ( 1301 "SESSION", 1302 "GLOBAL", 1303 "KEEP", 1304 "NOKEEP", 1305 "ORDER", 1306 "NOORDER", 1307 "NOCACHE", 1308 "CYCLE", 1309 "NOCYCLE", 1310 "NOMINVALUE", 1311 "NOMAXVALUE", 1312 "NOSCALE", 1313 "NOSHARD", 1314 ), 1315 tuple(), 1316 ), 1317 } 1318 1319 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1320 1321 USABLES: OPTIONS_TYPE = dict.fromkeys( 1322 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1323 ) 1324 1325 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1326 1327 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1328 "TYPE": ("EVOLUTION",), 1329 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1330 } 1331 1332 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1333 1334 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1335 1336 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1337 "NOT": ("ENFORCED",), 1338 "MATCH": ( 1339 "FULL", 1340 "PARTIAL", 1341 "SIMPLE", 1342 ), 1343 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1344 "USING": ( 1345 "BTREE", 1346 "HASH", 1347 ), 1348 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1349 } 1350 1351 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1352 "NO": ("OTHERS",), 1353 "CURRENT": ("ROW",), 1354 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1355 } 1356 1357 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1358 1359 CLONE_KEYWORDS = {"CLONE", "COPY"} 1360 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1361 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1362 1363 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1364 1365 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1366 1367 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1368 1369 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1370 1371 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1372 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1373 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1374 1375 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1376 1377 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1378 1379 ADD_CONSTRAINT_TOKENS = { 1380 TokenType.CONSTRAINT, 1381 TokenType.FOREIGN_KEY, 1382 TokenType.INDEX, 1383 TokenType.KEY, 1384 TokenType.PRIMARY_KEY, 1385 TokenType.UNIQUE, 1386 } 1387 1388 DISTINCT_TOKENS = {TokenType.DISTINCT} 1389 1390 NULL_TOKENS = {TokenType.NULL} 1391 1392 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1393 1394 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1395 1396 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1397 1398 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1399 1400 ODBC_DATETIME_LITERALS = { 1401 "d": exp.Date, 1402 "t": exp.Time, 1403 "ts": exp.Timestamp, 1404 } 1405 1406 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1407 1408 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1409 1410 # The style options for the DESCRIBE statement 1411 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1412 1413 # The style options for the ANALYZE statement 1414 ANALYZE_STYLES = { 1415 "BUFFER_USAGE_LIMIT", 1416 "FULL", 1417 "LOCAL", 1418 "NO_WRITE_TO_BINLOG", 1419 "SAMPLE", 1420 "SKIP_LOCKED", 1421 "VERBOSE", 1422 } 1423 1424 ANALYZE_EXPRESSION_PARSERS = { 1425 "ALL": lambda self: self._parse_analyze_columns(), 1426 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1427 "DELETE": lambda self: self._parse_analyze_delete(), 1428 "DROP": lambda self: self._parse_analyze_histogram(), 1429 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1430 "LIST": lambda self: self._parse_analyze_list(), 1431 "PREDICATE": lambda self: self._parse_analyze_columns(), 1432 "UPDATE": lambda self: self._parse_analyze_histogram(), 1433 "VALIDATE": lambda self: self._parse_analyze_validate(), 1434 } 1435 1436 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1437 1438 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1439 1440 OPERATION_MODIFIERS: t.Set[str] = set() 1441 1442 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1443 1444 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1445 1446 STRICT_CAST = True 1447 1448 PREFIXED_PIVOT_COLUMNS = False 1449 IDENTIFY_PIVOT_STRINGS = False 1450 1451 LOG_DEFAULTS_TO_LN = False 1452 1453 # Whether ADD is present for each column added by ALTER TABLE 1454 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1455 1456 # Whether the table sample clause expects CSV syntax 1457 TABLESAMPLE_CSV = False 1458 1459 # The default method used for table sampling 1460 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1461 1462 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1463 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1464 1465 # Whether the TRIM function expects the characters to trim as its first argument 1466 TRIM_PATTERN_FIRST = False 1467 1468 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1469 STRING_ALIASES = False 1470 1471 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1472 MODIFIERS_ATTACHED_TO_SET_OP = True 1473 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1474 1475 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1476 NO_PAREN_IF_COMMANDS = True 1477 1478 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1479 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1480 1481 # Whether the `:` operator is used to extract a value from a VARIANT column 1482 COLON_IS_VARIANT_EXTRACT = False 1483 1484 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1485 # If this is True and '(' is not found, the keyword will be treated as an identifier 1486 VALUES_FOLLOWED_BY_PAREN = True 1487 1488 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1489 SUPPORTS_IMPLICIT_UNNEST = False 1490 1491 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1492 INTERVAL_SPANS = True 1493 1494 # Whether a PARTITION clause can follow a table reference 1495 SUPPORTS_PARTITION_SELECTION = False 1496 1497 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1498 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1499 1500 # Whether the 'AS' keyword is optional in the CTE definition syntax 1501 OPTIONAL_ALIAS_TOKEN_CTE = True 1502 1503 __slots__ = ( 1504 "error_level", 1505 "error_message_context", 1506 "max_errors", 1507 "dialect", 1508 "sql", 1509 "errors", 1510 "_tokens", 1511 "_index", 1512 "_curr", 1513 "_next", 1514 "_prev", 1515 "_prev_comments", 1516 ) 1517 1518 # Autofilled 1519 SHOW_TRIE: t.Dict = {} 1520 SET_TRIE: t.Dict = {} 1521 1522 def __init__( 1523 self, 1524 error_level: t.Optional[ErrorLevel] = None, 1525 error_message_context: int = 100, 1526 max_errors: int = 3, 1527 dialect: DialectType = None, 1528 ): 1529 from sqlglot.dialects import Dialect 1530 1531 self.error_level = error_level or ErrorLevel.IMMEDIATE 1532 self.error_message_context = error_message_context 1533 self.max_errors = max_errors 1534 self.dialect = Dialect.get_or_raise(dialect) 1535 self.reset() 1536 1537 def reset(self): 1538 self.sql = "" 1539 self.errors = [] 1540 self._tokens = [] 1541 self._index = 0 1542 self._curr = None 1543 self._next = None 1544 self._prev = None 1545 self._prev_comments = None 1546 1547 def parse( 1548 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1549 ) -> t.List[t.Optional[exp.Expression]]: 1550 """ 1551 Parses a list of tokens and returns a list of syntax trees, one tree 1552 per parsed SQL statement. 1553 1554 Args: 1555 raw_tokens: The list of tokens. 1556 sql: The original SQL string, used to produce helpful debug messages. 1557 1558 Returns: 1559 The list of the produced syntax trees. 1560 """ 1561 return self._parse( 1562 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1563 ) 1564 1565 def parse_into( 1566 self, 1567 expression_types: exp.IntoType, 1568 raw_tokens: t.List[Token], 1569 sql: t.Optional[str] = None, 1570 ) -> t.List[t.Optional[exp.Expression]]: 1571 """ 1572 Parses a list of tokens into a given Expression type. If a collection of Expression 1573 types is given instead, this method will try to parse the token list into each one 1574 of them, stopping at the first for which the parsing succeeds. 1575 1576 Args: 1577 expression_types: The expression type(s) to try and parse the token list into. 1578 raw_tokens: The list of tokens. 1579 sql: The original SQL string, used to produce helpful debug messages. 1580 1581 Returns: 1582 The target Expression. 1583 """ 1584 errors = [] 1585 for expression_type in ensure_list(expression_types): 1586 parser = self.EXPRESSION_PARSERS.get(expression_type) 1587 if not parser: 1588 raise TypeError(f"No parser registered for {expression_type}") 1589 1590 try: 1591 return self._parse(parser, raw_tokens, sql) 1592 except ParseError as e: 1593 e.errors[0]["into_expression"] = expression_type 1594 errors.append(e) 1595 1596 raise ParseError( 1597 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1598 errors=merge_errors(errors), 1599 ) from errors[-1] 1600 1601 def _parse( 1602 self, 1603 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1604 raw_tokens: t.List[Token], 1605 sql: t.Optional[str] = None, 1606 ) -> t.List[t.Optional[exp.Expression]]: 1607 self.reset() 1608 self.sql = sql or "" 1609 1610 total = len(raw_tokens) 1611 chunks: t.List[t.List[Token]] = [[]] 1612 1613 for i, token in enumerate(raw_tokens): 1614 if token.token_type == TokenType.SEMICOLON: 1615 if token.comments: 1616 chunks.append([token]) 1617 1618 if i < total - 1: 1619 chunks.append([]) 1620 else: 1621 chunks[-1].append(token) 1622 1623 expressions = [] 1624 1625 for tokens in chunks: 1626 self._index = -1 1627 self._tokens = tokens 1628 self._advance() 1629 1630 expressions.append(parse_method(self)) 1631 1632 if self._index < len(self._tokens): 1633 self.raise_error("Invalid expression / Unexpected token") 1634 1635 self.check_errors() 1636 1637 return expressions 1638 1639 def check_errors(self) -> None: 1640 """Logs or raises any found errors, depending on the chosen error level setting.""" 1641 if self.error_level == ErrorLevel.WARN: 1642 for error in self.errors: 1643 logger.error(str(error)) 1644 elif self.error_level == ErrorLevel.RAISE and self.errors: 1645 raise ParseError( 1646 concat_messages(self.errors, self.max_errors), 1647 errors=merge_errors(self.errors), 1648 ) 1649 1650 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1651 """ 1652 Appends an error in the list of recorded errors or raises it, depending on the chosen 1653 error level setting. 1654 """ 1655 token = token or self._curr or self._prev or Token.string("") 1656 start = token.start 1657 end = token.end + 1 1658 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1659 highlight = self.sql[start:end] 1660 end_context = self.sql[end : end + self.error_message_context] 1661 1662 error = ParseError.new( 1663 f"{message}. Line {token.line}, Col: {token.col}.\n" 1664 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1665 description=message, 1666 line=token.line, 1667 col=token.col, 1668 start_context=start_context, 1669 highlight=highlight, 1670 end_context=end_context, 1671 ) 1672 1673 if self.error_level == ErrorLevel.IMMEDIATE: 1674 raise error 1675 1676 self.errors.append(error) 1677 1678 def expression( 1679 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1680 ) -> E: 1681 """ 1682 Creates a new, validated Expression. 1683 1684 Args: 1685 exp_class: The expression class to instantiate. 1686 comments: An optional list of comments to attach to the expression. 1687 kwargs: The arguments to set for the expression along with their respective values. 1688 1689 Returns: 1690 The target expression. 1691 """ 1692 instance = exp_class(**kwargs) 1693 instance.add_comments(comments) if comments else self._add_comments(instance) 1694 return self.validate_expression(instance) 1695 1696 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1697 if expression and self._prev_comments: 1698 expression.add_comments(self._prev_comments) 1699 self._prev_comments = None 1700 1701 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1702 """ 1703 Validates an Expression, making sure that all its mandatory arguments are set. 1704 1705 Args: 1706 expression: The expression to validate. 1707 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1708 1709 Returns: 1710 The validated expression. 1711 """ 1712 if self.error_level != ErrorLevel.IGNORE: 1713 for error_message in expression.error_messages(args): 1714 self.raise_error(error_message) 1715 1716 return expression 1717 1718 def _find_sql(self, start: Token, end: Token) -> str: 1719 return self.sql[start.start : end.end + 1] 1720 1721 def _is_connected(self) -> bool: 1722 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1723 1724 def _advance(self, times: int = 1) -> None: 1725 self._index += times 1726 self._curr = seq_get(self._tokens, self._index) 1727 self._next = seq_get(self._tokens, self._index + 1) 1728 1729 if self._index > 0: 1730 self._prev = self._tokens[self._index - 1] 1731 self._prev_comments = self._prev.comments 1732 else: 1733 self._prev = None 1734 self._prev_comments = None 1735 1736 def _retreat(self, index: int) -> None: 1737 if index != self._index: 1738 self._advance(index - self._index) 1739 1740 def _warn_unsupported(self) -> None: 1741 if len(self._tokens) <= 1: 1742 return 1743 1744 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1745 # interested in emitting a warning for the one being currently processed. 1746 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1747 1748 logger.warning( 1749 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1750 ) 1751 1752 def _parse_command(self) -> exp.Command: 1753 self._warn_unsupported() 1754 return self.expression( 1755 exp.Command, 1756 comments=self._prev_comments, 1757 this=self._prev.text.upper(), 1758 expression=self._parse_string(), 1759 ) 1760 1761 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1762 """ 1763 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1764 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1765 solve this by setting & resetting the parser state accordingly 1766 """ 1767 index = self._index 1768 error_level = self.error_level 1769 1770 self.error_level = ErrorLevel.IMMEDIATE 1771 try: 1772 this = parse_method() 1773 except ParseError: 1774 this = None 1775 finally: 1776 if not this or retreat: 1777 self._retreat(index) 1778 self.error_level = error_level 1779 1780 return this 1781 1782 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1783 start = self._prev 1784 exists = self._parse_exists() if allow_exists else None 1785 1786 self._match(TokenType.ON) 1787 1788 materialized = self._match_text_seq("MATERIALIZED") 1789 kind = self._match_set(self.CREATABLES) and self._prev 1790 if not kind: 1791 return self._parse_as_command(start) 1792 1793 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1794 this = self._parse_user_defined_function(kind=kind.token_type) 1795 elif kind.token_type == TokenType.TABLE: 1796 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1797 elif kind.token_type == TokenType.COLUMN: 1798 this = self._parse_column() 1799 else: 1800 this = self._parse_id_var() 1801 1802 self._match(TokenType.IS) 1803 1804 return self.expression( 1805 exp.Comment, 1806 this=this, 1807 kind=kind.text, 1808 expression=self._parse_string(), 1809 exists=exists, 1810 materialized=materialized, 1811 ) 1812 1813 def _parse_to_table( 1814 self, 1815 ) -> exp.ToTableProperty: 1816 table = self._parse_table_parts(schema=True) 1817 return self.expression(exp.ToTableProperty, this=table) 1818 1819 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1820 def _parse_ttl(self) -> exp.Expression: 1821 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1822 this = self._parse_bitwise() 1823 1824 if self._match_text_seq("DELETE"): 1825 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1826 if self._match_text_seq("RECOMPRESS"): 1827 return self.expression( 1828 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1829 ) 1830 if self._match_text_seq("TO", "DISK"): 1831 return self.expression( 1832 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1833 ) 1834 if self._match_text_seq("TO", "VOLUME"): 1835 return self.expression( 1836 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1837 ) 1838 1839 return this 1840 1841 expressions = self._parse_csv(_parse_ttl_action) 1842 where = self._parse_where() 1843 group = self._parse_group() 1844 1845 aggregates = None 1846 if group and self._match(TokenType.SET): 1847 aggregates = self._parse_csv(self._parse_set_item) 1848 1849 return self.expression( 1850 exp.MergeTreeTTL, 1851 expressions=expressions, 1852 where=where, 1853 group=group, 1854 aggregates=aggregates, 1855 ) 1856 1857 def _parse_statement(self) -> t.Optional[exp.Expression]: 1858 if self._curr is None: 1859 return None 1860 1861 if self._match_set(self.STATEMENT_PARSERS): 1862 comments = self._prev_comments 1863 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1864 stmt.add_comments(comments, prepend=True) 1865 return stmt 1866 1867 if self._match_set(self.dialect.tokenizer.COMMANDS): 1868 return self._parse_command() 1869 1870 expression = self._parse_expression() 1871 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1872 return self._parse_query_modifiers(expression) 1873 1874 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1875 start = self._prev 1876 temporary = self._match(TokenType.TEMPORARY) 1877 materialized = self._match_text_seq("MATERIALIZED") 1878 1879 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1880 if not kind: 1881 return self._parse_as_command(start) 1882 1883 concurrently = self._match_text_seq("CONCURRENTLY") 1884 if_exists = exists or self._parse_exists() 1885 1886 if kind == "COLUMN": 1887 this = self._parse_column() 1888 else: 1889 this = self._parse_table_parts( 1890 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1891 ) 1892 1893 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1894 1895 if self._match(TokenType.L_PAREN, advance=False): 1896 expressions = self._parse_wrapped_csv(self._parse_types) 1897 else: 1898 expressions = None 1899 1900 return self.expression( 1901 exp.Drop, 1902 exists=if_exists, 1903 this=this, 1904 expressions=expressions, 1905 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1906 temporary=temporary, 1907 materialized=materialized, 1908 cascade=self._match_text_seq("CASCADE"), 1909 constraints=self._match_text_seq("CONSTRAINTS"), 1910 purge=self._match_text_seq("PURGE"), 1911 cluster=cluster, 1912 concurrently=concurrently, 1913 ) 1914 1915 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1916 return ( 1917 self._match_text_seq("IF") 1918 and (not not_ or self._match(TokenType.NOT)) 1919 and self._match(TokenType.EXISTS) 1920 ) 1921 1922 def _parse_create(self) -> exp.Create | exp.Command: 1923 # Note: this can't be None because we've matched a statement parser 1924 start = self._prev 1925 1926 replace = ( 1927 start.token_type == TokenType.REPLACE 1928 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1929 or self._match_pair(TokenType.OR, TokenType.ALTER) 1930 ) 1931 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1932 1933 unique = self._match(TokenType.UNIQUE) 1934 1935 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1936 clustered = True 1937 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1938 "COLUMNSTORE" 1939 ): 1940 clustered = False 1941 else: 1942 clustered = None 1943 1944 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1945 self._advance() 1946 1947 properties = None 1948 create_token = self._match_set(self.CREATABLES) and self._prev 1949 1950 if not create_token: 1951 # exp.Properties.Location.POST_CREATE 1952 properties = self._parse_properties() 1953 create_token = self._match_set(self.CREATABLES) and self._prev 1954 1955 if not properties or not create_token: 1956 return self._parse_as_command(start) 1957 1958 concurrently = self._match_text_seq("CONCURRENTLY") 1959 exists = self._parse_exists(not_=True) 1960 this = None 1961 expression: t.Optional[exp.Expression] = None 1962 indexes = None 1963 no_schema_binding = None 1964 begin = None 1965 end = None 1966 clone = None 1967 1968 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1969 nonlocal properties 1970 if properties and temp_props: 1971 properties.expressions.extend(temp_props.expressions) 1972 elif temp_props: 1973 properties = temp_props 1974 1975 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1976 this = self._parse_user_defined_function(kind=create_token.token_type) 1977 1978 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1979 extend_props(self._parse_properties()) 1980 1981 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1982 extend_props(self._parse_properties()) 1983 1984 if not expression: 1985 if self._match(TokenType.COMMAND): 1986 expression = self._parse_as_command(self._prev) 1987 else: 1988 begin = self._match(TokenType.BEGIN) 1989 return_ = self._match_text_seq("RETURN") 1990 1991 if self._match(TokenType.STRING, advance=False): 1992 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1993 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1994 expression = self._parse_string() 1995 extend_props(self._parse_properties()) 1996 else: 1997 expression = self._parse_user_defined_function_expression() 1998 1999 end = self._match_text_seq("END") 2000 2001 if return_: 2002 expression = self.expression(exp.Return, this=expression) 2003 elif create_token.token_type == TokenType.INDEX: 2004 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2005 if not self._match(TokenType.ON): 2006 index = self._parse_id_var() 2007 anonymous = False 2008 else: 2009 index = None 2010 anonymous = True 2011 2012 this = self._parse_index(index=index, anonymous=anonymous) 2013 elif create_token.token_type in self.DB_CREATABLES: 2014 table_parts = self._parse_table_parts( 2015 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2016 ) 2017 2018 # exp.Properties.Location.POST_NAME 2019 self._match(TokenType.COMMA) 2020 extend_props(self._parse_properties(before=True)) 2021 2022 this = self._parse_schema(this=table_parts) 2023 2024 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2025 extend_props(self._parse_properties()) 2026 2027 has_alias = self._match(TokenType.ALIAS) 2028 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2029 # exp.Properties.Location.POST_ALIAS 2030 extend_props(self._parse_properties()) 2031 2032 if create_token.token_type == TokenType.SEQUENCE: 2033 expression = self._parse_types() 2034 extend_props(self._parse_properties()) 2035 else: 2036 expression = self._parse_ddl_select() 2037 2038 # Some dialects also support using a table as an alias instead of a SELECT. 2039 # Here we fallback to this as an alternative. 2040 if not expression and has_alias: 2041 expression = self._try_parse(self._parse_table_parts) 2042 2043 if create_token.token_type == TokenType.TABLE: 2044 # exp.Properties.Location.POST_EXPRESSION 2045 extend_props(self._parse_properties()) 2046 2047 indexes = [] 2048 while True: 2049 index = self._parse_index() 2050 2051 # exp.Properties.Location.POST_INDEX 2052 extend_props(self._parse_properties()) 2053 if not index: 2054 break 2055 else: 2056 self._match(TokenType.COMMA) 2057 indexes.append(index) 2058 elif create_token.token_type == TokenType.VIEW: 2059 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2060 no_schema_binding = True 2061 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2062 extend_props(self._parse_properties()) 2063 2064 shallow = self._match_text_seq("SHALLOW") 2065 2066 if self._match_texts(self.CLONE_KEYWORDS): 2067 copy = self._prev.text.lower() == "copy" 2068 clone = self.expression( 2069 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2070 ) 2071 2072 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2073 return self._parse_as_command(start) 2074 2075 create_kind_text = create_token.text.upper() 2076 return self.expression( 2077 exp.Create, 2078 this=this, 2079 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2080 replace=replace, 2081 refresh=refresh, 2082 unique=unique, 2083 expression=expression, 2084 exists=exists, 2085 properties=properties, 2086 indexes=indexes, 2087 no_schema_binding=no_schema_binding, 2088 begin=begin, 2089 end=end, 2090 clone=clone, 2091 concurrently=concurrently, 2092 clustered=clustered, 2093 ) 2094 2095 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2096 seq = exp.SequenceProperties() 2097 2098 options = [] 2099 index = self._index 2100 2101 while self._curr: 2102 self._match(TokenType.COMMA) 2103 if self._match_text_seq("INCREMENT"): 2104 self._match_text_seq("BY") 2105 self._match_text_seq("=") 2106 seq.set("increment", self._parse_term()) 2107 elif self._match_text_seq("MINVALUE"): 2108 seq.set("minvalue", self._parse_term()) 2109 elif self._match_text_seq("MAXVALUE"): 2110 seq.set("maxvalue", self._parse_term()) 2111 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2112 self._match_text_seq("=") 2113 seq.set("start", self._parse_term()) 2114 elif self._match_text_seq("CACHE"): 2115 # T-SQL allows empty CACHE which is initialized dynamically 2116 seq.set("cache", self._parse_number() or True) 2117 elif self._match_text_seq("OWNED", "BY"): 2118 # "OWNED BY NONE" is the default 2119 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2120 else: 2121 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2122 if opt: 2123 options.append(opt) 2124 else: 2125 break 2126 2127 seq.set("options", options if options else None) 2128 return None if self._index == index else seq 2129 2130 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2131 # only used for teradata currently 2132 self._match(TokenType.COMMA) 2133 2134 kwargs = { 2135 "no": self._match_text_seq("NO"), 2136 "dual": self._match_text_seq("DUAL"), 2137 "before": self._match_text_seq("BEFORE"), 2138 "default": self._match_text_seq("DEFAULT"), 2139 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2140 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2141 "after": self._match_text_seq("AFTER"), 2142 "minimum": self._match_texts(("MIN", "MINIMUM")), 2143 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2144 } 2145 2146 if self._match_texts(self.PROPERTY_PARSERS): 2147 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2148 try: 2149 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2150 except TypeError: 2151 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2152 2153 return None 2154 2155 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2156 return self._parse_wrapped_csv(self._parse_property) 2157 2158 def _parse_property(self) -> t.Optional[exp.Expression]: 2159 if self._match_texts(self.PROPERTY_PARSERS): 2160 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2161 2162 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2163 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2164 2165 if self._match_text_seq("COMPOUND", "SORTKEY"): 2166 return self._parse_sortkey(compound=True) 2167 2168 if self._match_text_seq("SQL", "SECURITY"): 2169 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2170 2171 index = self._index 2172 key = self._parse_column() 2173 2174 if not self._match(TokenType.EQ): 2175 self._retreat(index) 2176 return self._parse_sequence_properties() 2177 2178 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2179 if isinstance(key, exp.Column): 2180 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2181 2182 value = self._parse_bitwise() or self._parse_var(any_token=True) 2183 2184 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2185 if isinstance(value, exp.Column): 2186 value = exp.var(value.name) 2187 2188 return self.expression(exp.Property, this=key, value=value) 2189 2190 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2191 if self._match_text_seq("BY"): 2192 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2193 2194 self._match(TokenType.ALIAS) 2195 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2196 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2197 2198 return self.expression( 2199 exp.FileFormatProperty, 2200 this=( 2201 self.expression( 2202 exp.InputOutputFormat, 2203 input_format=input_format, 2204 output_format=output_format, 2205 ) 2206 if input_format or output_format 2207 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2208 ), 2209 ) 2210 2211 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2212 field = self._parse_field() 2213 if isinstance(field, exp.Identifier) and not field.quoted: 2214 field = exp.var(field) 2215 2216 return field 2217 2218 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2219 self._match(TokenType.EQ) 2220 self._match(TokenType.ALIAS) 2221 2222 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2223 2224 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2225 properties = [] 2226 while True: 2227 if before: 2228 prop = self._parse_property_before() 2229 else: 2230 prop = self._parse_property() 2231 if not prop: 2232 break 2233 for p in ensure_list(prop): 2234 properties.append(p) 2235 2236 if properties: 2237 return self.expression(exp.Properties, expressions=properties) 2238 2239 return None 2240 2241 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2242 return self.expression( 2243 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2244 ) 2245 2246 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2247 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2248 security_specifier = self._prev.text.upper() 2249 return self.expression(exp.SecurityProperty, this=security_specifier) 2250 return None 2251 2252 def _parse_settings_property(self) -> exp.SettingsProperty: 2253 return self.expression( 2254 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2255 ) 2256 2257 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2258 if self._index >= 2: 2259 pre_volatile_token = self._tokens[self._index - 2] 2260 else: 2261 pre_volatile_token = None 2262 2263 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2264 return exp.VolatileProperty() 2265 2266 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2267 2268 def _parse_retention_period(self) -> exp.Var: 2269 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2270 number = self._parse_number() 2271 number_str = f"{number} " if number else "" 2272 unit = self._parse_var(any_token=True) 2273 return exp.var(f"{number_str}{unit}") 2274 2275 def _parse_system_versioning_property( 2276 self, with_: bool = False 2277 ) -> exp.WithSystemVersioningProperty: 2278 self._match(TokenType.EQ) 2279 prop = self.expression( 2280 exp.WithSystemVersioningProperty, 2281 **{ # type: ignore 2282 "on": True, 2283 "with": with_, 2284 }, 2285 ) 2286 2287 if self._match_text_seq("OFF"): 2288 prop.set("on", False) 2289 return prop 2290 2291 self._match(TokenType.ON) 2292 if self._match(TokenType.L_PAREN): 2293 while self._curr and not self._match(TokenType.R_PAREN): 2294 if self._match_text_seq("HISTORY_TABLE", "="): 2295 prop.set("this", self._parse_table_parts()) 2296 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2297 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2298 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2299 prop.set("retention_period", self._parse_retention_period()) 2300 2301 self._match(TokenType.COMMA) 2302 2303 return prop 2304 2305 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2306 self._match(TokenType.EQ) 2307 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2308 prop = self.expression(exp.DataDeletionProperty, on=on) 2309 2310 if self._match(TokenType.L_PAREN): 2311 while self._curr and not self._match(TokenType.R_PAREN): 2312 if self._match_text_seq("FILTER_COLUMN", "="): 2313 prop.set("filter_column", self._parse_column()) 2314 elif self._match_text_seq("RETENTION_PERIOD", "="): 2315 prop.set("retention_period", self._parse_retention_period()) 2316 2317 self._match(TokenType.COMMA) 2318 2319 return prop 2320 2321 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2322 kind = "HASH" 2323 expressions: t.Optional[t.List[exp.Expression]] = None 2324 if self._match_text_seq("BY", "HASH"): 2325 expressions = self._parse_wrapped_csv(self._parse_id_var) 2326 elif self._match_text_seq("BY", "RANDOM"): 2327 kind = "RANDOM" 2328 2329 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2330 buckets: t.Optional[exp.Expression] = None 2331 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2332 buckets = self._parse_number() 2333 2334 return self.expression( 2335 exp.DistributedByProperty, 2336 expressions=expressions, 2337 kind=kind, 2338 buckets=buckets, 2339 order=self._parse_order(), 2340 ) 2341 2342 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2343 self._match_text_seq("KEY") 2344 expressions = self._parse_wrapped_id_vars() 2345 return self.expression(expr_type, expressions=expressions) 2346 2347 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2348 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2349 prop = self._parse_system_versioning_property(with_=True) 2350 self._match_r_paren() 2351 return prop 2352 2353 if self._match(TokenType.L_PAREN, advance=False): 2354 return self._parse_wrapped_properties() 2355 2356 if self._match_text_seq("JOURNAL"): 2357 return self._parse_withjournaltable() 2358 2359 if self._match_texts(self.VIEW_ATTRIBUTES): 2360 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2361 2362 if self._match_text_seq("DATA"): 2363 return self._parse_withdata(no=False) 2364 elif self._match_text_seq("NO", "DATA"): 2365 return self._parse_withdata(no=True) 2366 2367 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2368 return self._parse_serde_properties(with_=True) 2369 2370 if self._match(TokenType.SCHEMA): 2371 return self.expression( 2372 exp.WithSchemaBindingProperty, 2373 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2374 ) 2375 2376 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2377 return self.expression( 2378 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2379 ) 2380 2381 if not self._next: 2382 return None 2383 2384 return self._parse_withisolatedloading() 2385 2386 def _parse_procedure_option(self) -> exp.Expression | None: 2387 if self._match_text_seq("EXECUTE", "AS"): 2388 return self.expression( 2389 exp.ExecuteAsProperty, 2390 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2391 or self._parse_string(), 2392 ) 2393 2394 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2395 2396 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2397 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2398 self._match(TokenType.EQ) 2399 2400 user = self._parse_id_var() 2401 self._match(TokenType.PARAMETER) 2402 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2403 2404 if not user or not host: 2405 return None 2406 2407 return exp.DefinerProperty(this=f"{user}@{host}") 2408 2409 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2410 self._match(TokenType.TABLE) 2411 self._match(TokenType.EQ) 2412 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2413 2414 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2415 return self.expression(exp.LogProperty, no=no) 2416 2417 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2418 return self.expression(exp.JournalProperty, **kwargs) 2419 2420 def _parse_checksum(self) -> exp.ChecksumProperty: 2421 self._match(TokenType.EQ) 2422 2423 on = None 2424 if self._match(TokenType.ON): 2425 on = True 2426 elif self._match_text_seq("OFF"): 2427 on = False 2428 2429 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2430 2431 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2432 return self.expression( 2433 exp.Cluster, 2434 expressions=( 2435 self._parse_wrapped_csv(self._parse_ordered) 2436 if wrapped 2437 else self._parse_csv(self._parse_ordered) 2438 ), 2439 ) 2440 2441 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2442 self._match_text_seq("BY") 2443 2444 self._match_l_paren() 2445 expressions = self._parse_csv(self._parse_column) 2446 self._match_r_paren() 2447 2448 if self._match_text_seq("SORTED", "BY"): 2449 self._match_l_paren() 2450 sorted_by = self._parse_csv(self._parse_ordered) 2451 self._match_r_paren() 2452 else: 2453 sorted_by = None 2454 2455 self._match(TokenType.INTO) 2456 buckets = self._parse_number() 2457 self._match_text_seq("BUCKETS") 2458 2459 return self.expression( 2460 exp.ClusteredByProperty, 2461 expressions=expressions, 2462 sorted_by=sorted_by, 2463 buckets=buckets, 2464 ) 2465 2466 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2467 if not self._match_text_seq("GRANTS"): 2468 self._retreat(self._index - 1) 2469 return None 2470 2471 return self.expression(exp.CopyGrantsProperty) 2472 2473 def _parse_freespace(self) -> exp.FreespaceProperty: 2474 self._match(TokenType.EQ) 2475 return self.expression( 2476 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2477 ) 2478 2479 def _parse_mergeblockratio( 2480 self, no: bool = False, default: bool = False 2481 ) -> exp.MergeBlockRatioProperty: 2482 if self._match(TokenType.EQ): 2483 return self.expression( 2484 exp.MergeBlockRatioProperty, 2485 this=self._parse_number(), 2486 percent=self._match(TokenType.PERCENT), 2487 ) 2488 2489 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2490 2491 def _parse_datablocksize( 2492 self, 2493 default: t.Optional[bool] = None, 2494 minimum: t.Optional[bool] = None, 2495 maximum: t.Optional[bool] = None, 2496 ) -> exp.DataBlocksizeProperty: 2497 self._match(TokenType.EQ) 2498 size = self._parse_number() 2499 2500 units = None 2501 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2502 units = self._prev.text 2503 2504 return self.expression( 2505 exp.DataBlocksizeProperty, 2506 size=size, 2507 units=units, 2508 default=default, 2509 minimum=minimum, 2510 maximum=maximum, 2511 ) 2512 2513 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2514 self._match(TokenType.EQ) 2515 always = self._match_text_seq("ALWAYS") 2516 manual = self._match_text_seq("MANUAL") 2517 never = self._match_text_seq("NEVER") 2518 default = self._match_text_seq("DEFAULT") 2519 2520 autotemp = None 2521 if self._match_text_seq("AUTOTEMP"): 2522 autotemp = self._parse_schema() 2523 2524 return self.expression( 2525 exp.BlockCompressionProperty, 2526 always=always, 2527 manual=manual, 2528 never=never, 2529 default=default, 2530 autotemp=autotemp, 2531 ) 2532 2533 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2534 index = self._index 2535 no = self._match_text_seq("NO") 2536 concurrent = self._match_text_seq("CONCURRENT") 2537 2538 if not self._match_text_seq("ISOLATED", "LOADING"): 2539 self._retreat(index) 2540 return None 2541 2542 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2543 return self.expression( 2544 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2545 ) 2546 2547 def _parse_locking(self) -> exp.LockingProperty: 2548 if self._match(TokenType.TABLE): 2549 kind = "TABLE" 2550 elif self._match(TokenType.VIEW): 2551 kind = "VIEW" 2552 elif self._match(TokenType.ROW): 2553 kind = "ROW" 2554 elif self._match_text_seq("DATABASE"): 2555 kind = "DATABASE" 2556 else: 2557 kind = None 2558 2559 if kind in ("DATABASE", "TABLE", "VIEW"): 2560 this = self._parse_table_parts() 2561 else: 2562 this = None 2563 2564 if self._match(TokenType.FOR): 2565 for_or_in = "FOR" 2566 elif self._match(TokenType.IN): 2567 for_or_in = "IN" 2568 else: 2569 for_or_in = None 2570 2571 if self._match_text_seq("ACCESS"): 2572 lock_type = "ACCESS" 2573 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2574 lock_type = "EXCLUSIVE" 2575 elif self._match_text_seq("SHARE"): 2576 lock_type = "SHARE" 2577 elif self._match_text_seq("READ"): 2578 lock_type = "READ" 2579 elif self._match_text_seq("WRITE"): 2580 lock_type = "WRITE" 2581 elif self._match_text_seq("CHECKSUM"): 2582 lock_type = "CHECKSUM" 2583 else: 2584 lock_type = None 2585 2586 override = self._match_text_seq("OVERRIDE") 2587 2588 return self.expression( 2589 exp.LockingProperty, 2590 this=this, 2591 kind=kind, 2592 for_or_in=for_or_in, 2593 lock_type=lock_type, 2594 override=override, 2595 ) 2596 2597 def _parse_partition_by(self) -> t.List[exp.Expression]: 2598 if self._match(TokenType.PARTITION_BY): 2599 return self._parse_csv(self._parse_assignment) 2600 return [] 2601 2602 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2603 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2604 if self._match_text_seq("MINVALUE"): 2605 return exp.var("MINVALUE") 2606 if self._match_text_seq("MAXVALUE"): 2607 return exp.var("MAXVALUE") 2608 return self._parse_bitwise() 2609 2610 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2611 expression = None 2612 from_expressions = None 2613 to_expressions = None 2614 2615 if self._match(TokenType.IN): 2616 this = self._parse_wrapped_csv(self._parse_bitwise) 2617 elif self._match(TokenType.FROM): 2618 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2619 self._match_text_seq("TO") 2620 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2621 elif self._match_text_seq("WITH", "(", "MODULUS"): 2622 this = self._parse_number() 2623 self._match_text_seq(",", "REMAINDER") 2624 expression = self._parse_number() 2625 self._match_r_paren() 2626 else: 2627 self.raise_error("Failed to parse partition bound spec.") 2628 2629 return self.expression( 2630 exp.PartitionBoundSpec, 2631 this=this, 2632 expression=expression, 2633 from_expressions=from_expressions, 2634 to_expressions=to_expressions, 2635 ) 2636 2637 # https://www.postgresql.org/docs/current/sql-createtable.html 2638 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2639 if not self._match_text_seq("OF"): 2640 self._retreat(self._index - 1) 2641 return None 2642 2643 this = self._parse_table(schema=True) 2644 2645 if self._match(TokenType.DEFAULT): 2646 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2647 elif self._match_text_seq("FOR", "VALUES"): 2648 expression = self._parse_partition_bound_spec() 2649 else: 2650 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2651 2652 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2653 2654 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2655 self._match(TokenType.EQ) 2656 return self.expression( 2657 exp.PartitionedByProperty, 2658 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2659 ) 2660 2661 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2662 if self._match_text_seq("AND", "STATISTICS"): 2663 statistics = True 2664 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2665 statistics = False 2666 else: 2667 statistics = None 2668 2669 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2670 2671 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2672 if self._match_text_seq("SQL"): 2673 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2674 return None 2675 2676 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2677 if self._match_text_seq("SQL", "DATA"): 2678 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2679 return None 2680 2681 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2682 if self._match_text_seq("PRIMARY", "INDEX"): 2683 return exp.NoPrimaryIndexProperty() 2684 if self._match_text_seq("SQL"): 2685 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2686 return None 2687 2688 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2689 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2690 return exp.OnCommitProperty() 2691 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2692 return exp.OnCommitProperty(delete=True) 2693 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2694 2695 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2696 if self._match_text_seq("SQL", "DATA"): 2697 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2698 return None 2699 2700 def _parse_distkey(self) -> exp.DistKeyProperty: 2701 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2702 2703 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2704 table = self._parse_table(schema=True) 2705 2706 options = [] 2707 while self._match_texts(("INCLUDING", "EXCLUDING")): 2708 this = self._prev.text.upper() 2709 2710 id_var = self._parse_id_var() 2711 if not id_var: 2712 return None 2713 2714 options.append( 2715 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2716 ) 2717 2718 return self.expression(exp.LikeProperty, this=table, expressions=options) 2719 2720 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2721 return self.expression( 2722 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2723 ) 2724 2725 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2726 self._match(TokenType.EQ) 2727 return self.expression( 2728 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2729 ) 2730 2731 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2732 self._match_text_seq("WITH", "CONNECTION") 2733 return self.expression( 2734 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2735 ) 2736 2737 def _parse_returns(self) -> exp.ReturnsProperty: 2738 value: t.Optional[exp.Expression] 2739 null = None 2740 is_table = self._match(TokenType.TABLE) 2741 2742 if is_table: 2743 if self._match(TokenType.LT): 2744 value = self.expression( 2745 exp.Schema, 2746 this="TABLE", 2747 expressions=self._parse_csv(self._parse_struct_types), 2748 ) 2749 if not self._match(TokenType.GT): 2750 self.raise_error("Expecting >") 2751 else: 2752 value = self._parse_schema(exp.var("TABLE")) 2753 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2754 null = True 2755 value = None 2756 else: 2757 value = self._parse_types() 2758 2759 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2760 2761 def _parse_describe(self) -> exp.Describe: 2762 kind = self._match_set(self.CREATABLES) and self._prev.text 2763 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2764 if self._match(TokenType.DOT): 2765 style = None 2766 self._retreat(self._index - 2) 2767 2768 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2769 2770 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2771 this = self._parse_statement() 2772 else: 2773 this = self._parse_table(schema=True) 2774 2775 properties = self._parse_properties() 2776 expressions = properties.expressions if properties else None 2777 partition = self._parse_partition() 2778 return self.expression( 2779 exp.Describe, 2780 this=this, 2781 style=style, 2782 kind=kind, 2783 expressions=expressions, 2784 partition=partition, 2785 format=format, 2786 ) 2787 2788 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2789 kind = self._prev.text.upper() 2790 expressions = [] 2791 2792 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2793 if self._match(TokenType.WHEN): 2794 expression = self._parse_disjunction() 2795 self._match(TokenType.THEN) 2796 else: 2797 expression = None 2798 2799 else_ = self._match(TokenType.ELSE) 2800 2801 if not self._match(TokenType.INTO): 2802 return None 2803 2804 return self.expression( 2805 exp.ConditionalInsert, 2806 this=self.expression( 2807 exp.Insert, 2808 this=self._parse_table(schema=True), 2809 expression=self._parse_derived_table_values(), 2810 ), 2811 expression=expression, 2812 else_=else_, 2813 ) 2814 2815 expression = parse_conditional_insert() 2816 while expression is not None: 2817 expressions.append(expression) 2818 expression = parse_conditional_insert() 2819 2820 return self.expression( 2821 exp.MultitableInserts, 2822 kind=kind, 2823 comments=comments, 2824 expressions=expressions, 2825 source=self._parse_table(), 2826 ) 2827 2828 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2829 comments = [] 2830 hint = self._parse_hint() 2831 overwrite = self._match(TokenType.OVERWRITE) 2832 ignore = self._match(TokenType.IGNORE) 2833 local = self._match_text_seq("LOCAL") 2834 alternative = None 2835 is_function = None 2836 2837 if self._match_text_seq("DIRECTORY"): 2838 this: t.Optional[exp.Expression] = self.expression( 2839 exp.Directory, 2840 this=self._parse_var_or_string(), 2841 local=local, 2842 row_format=self._parse_row_format(match_row=True), 2843 ) 2844 else: 2845 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2846 comments += ensure_list(self._prev_comments) 2847 return self._parse_multitable_inserts(comments) 2848 2849 if self._match(TokenType.OR): 2850 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2851 2852 self._match(TokenType.INTO) 2853 comments += ensure_list(self._prev_comments) 2854 self._match(TokenType.TABLE) 2855 is_function = self._match(TokenType.FUNCTION) 2856 2857 this = ( 2858 self._parse_table(schema=True, parse_partition=True) 2859 if not is_function 2860 else self._parse_function() 2861 ) 2862 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2863 this.set("alias", self._parse_table_alias()) 2864 2865 returning = self._parse_returning() 2866 2867 return self.expression( 2868 exp.Insert, 2869 comments=comments, 2870 hint=hint, 2871 is_function=is_function, 2872 this=this, 2873 stored=self._match_text_seq("STORED") and self._parse_stored(), 2874 by_name=self._match_text_seq("BY", "NAME"), 2875 exists=self._parse_exists(), 2876 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2877 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2878 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2879 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2880 conflict=self._parse_on_conflict(), 2881 returning=returning or self._parse_returning(), 2882 overwrite=overwrite, 2883 alternative=alternative, 2884 ignore=ignore, 2885 source=self._match(TokenType.TABLE) and self._parse_table(), 2886 ) 2887 2888 def _parse_kill(self) -> exp.Kill: 2889 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2890 2891 return self.expression( 2892 exp.Kill, 2893 this=self._parse_primary(), 2894 kind=kind, 2895 ) 2896 2897 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2898 conflict = self._match_text_seq("ON", "CONFLICT") 2899 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2900 2901 if not conflict and not duplicate: 2902 return None 2903 2904 conflict_keys = None 2905 constraint = None 2906 2907 if conflict: 2908 if self._match_text_seq("ON", "CONSTRAINT"): 2909 constraint = self._parse_id_var() 2910 elif self._match(TokenType.L_PAREN): 2911 conflict_keys = self._parse_csv(self._parse_id_var) 2912 self._match_r_paren() 2913 2914 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2915 if self._prev.token_type == TokenType.UPDATE: 2916 self._match(TokenType.SET) 2917 expressions = self._parse_csv(self._parse_equality) 2918 else: 2919 expressions = None 2920 2921 return self.expression( 2922 exp.OnConflict, 2923 duplicate=duplicate, 2924 expressions=expressions, 2925 action=action, 2926 conflict_keys=conflict_keys, 2927 constraint=constraint, 2928 where=self._parse_where(), 2929 ) 2930 2931 def _parse_returning(self) -> t.Optional[exp.Returning]: 2932 if not self._match(TokenType.RETURNING): 2933 return None 2934 return self.expression( 2935 exp.Returning, 2936 expressions=self._parse_csv(self._parse_expression), 2937 into=self._match(TokenType.INTO) and self._parse_table_part(), 2938 ) 2939 2940 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2941 if not self._match(TokenType.FORMAT): 2942 return None 2943 return self._parse_row_format() 2944 2945 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2946 index = self._index 2947 with_ = with_ or self._match_text_seq("WITH") 2948 2949 if not self._match(TokenType.SERDE_PROPERTIES): 2950 self._retreat(index) 2951 return None 2952 return self.expression( 2953 exp.SerdeProperties, 2954 **{ # type: ignore 2955 "expressions": self._parse_wrapped_properties(), 2956 "with": with_, 2957 }, 2958 ) 2959 2960 def _parse_row_format( 2961 self, match_row: bool = False 2962 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2963 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2964 return None 2965 2966 if self._match_text_seq("SERDE"): 2967 this = self._parse_string() 2968 2969 serde_properties = self._parse_serde_properties() 2970 2971 return self.expression( 2972 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2973 ) 2974 2975 self._match_text_seq("DELIMITED") 2976 2977 kwargs = {} 2978 2979 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2980 kwargs["fields"] = self._parse_string() 2981 if self._match_text_seq("ESCAPED", "BY"): 2982 kwargs["escaped"] = self._parse_string() 2983 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2984 kwargs["collection_items"] = self._parse_string() 2985 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2986 kwargs["map_keys"] = self._parse_string() 2987 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2988 kwargs["lines"] = self._parse_string() 2989 if self._match_text_seq("NULL", "DEFINED", "AS"): 2990 kwargs["null"] = self._parse_string() 2991 2992 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2993 2994 def _parse_load(self) -> exp.LoadData | exp.Command: 2995 if self._match_text_seq("DATA"): 2996 local = self._match_text_seq("LOCAL") 2997 self._match_text_seq("INPATH") 2998 inpath = self._parse_string() 2999 overwrite = self._match(TokenType.OVERWRITE) 3000 self._match_pair(TokenType.INTO, TokenType.TABLE) 3001 3002 return self.expression( 3003 exp.LoadData, 3004 this=self._parse_table(schema=True), 3005 local=local, 3006 overwrite=overwrite, 3007 inpath=inpath, 3008 partition=self._parse_partition(), 3009 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3010 serde=self._match_text_seq("SERDE") and self._parse_string(), 3011 ) 3012 return self._parse_as_command(self._prev) 3013 3014 def _parse_delete(self) -> exp.Delete: 3015 # This handles MySQL's "Multiple-Table Syntax" 3016 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3017 tables = None 3018 if not self._match(TokenType.FROM, advance=False): 3019 tables = self._parse_csv(self._parse_table) or None 3020 3021 returning = self._parse_returning() 3022 3023 return self.expression( 3024 exp.Delete, 3025 tables=tables, 3026 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3027 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3028 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3029 where=self._parse_where(), 3030 returning=returning or self._parse_returning(), 3031 limit=self._parse_limit(), 3032 ) 3033 3034 def _parse_update(self) -> exp.Update: 3035 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3036 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3037 returning = self._parse_returning() 3038 return self.expression( 3039 exp.Update, 3040 **{ # type: ignore 3041 "this": this, 3042 "expressions": expressions, 3043 "from": self._parse_from(joins=True), 3044 "where": self._parse_where(), 3045 "returning": returning or self._parse_returning(), 3046 "order": self._parse_order(), 3047 "limit": self._parse_limit(), 3048 }, 3049 ) 3050 3051 def _parse_use(self) -> exp.Use: 3052 return self.expression( 3053 exp.Use, 3054 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3055 this=self._parse_table(schema=False), 3056 ) 3057 3058 def _parse_uncache(self) -> exp.Uncache: 3059 if not self._match(TokenType.TABLE): 3060 self.raise_error("Expecting TABLE after UNCACHE") 3061 3062 return self.expression( 3063 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3064 ) 3065 3066 def _parse_cache(self) -> exp.Cache: 3067 lazy = self._match_text_seq("LAZY") 3068 self._match(TokenType.TABLE) 3069 table = self._parse_table(schema=True) 3070 3071 options = [] 3072 if self._match_text_seq("OPTIONS"): 3073 self._match_l_paren() 3074 k = self._parse_string() 3075 self._match(TokenType.EQ) 3076 v = self._parse_string() 3077 options = [k, v] 3078 self._match_r_paren() 3079 3080 self._match(TokenType.ALIAS) 3081 return self.expression( 3082 exp.Cache, 3083 this=table, 3084 lazy=lazy, 3085 options=options, 3086 expression=self._parse_select(nested=True), 3087 ) 3088 3089 def _parse_partition(self) -> t.Optional[exp.Partition]: 3090 if not self._match_texts(self.PARTITION_KEYWORDS): 3091 return None 3092 3093 return self.expression( 3094 exp.Partition, 3095 subpartition=self._prev.text.upper() == "SUBPARTITION", 3096 expressions=self._parse_wrapped_csv(self._parse_assignment), 3097 ) 3098 3099 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3100 def _parse_value_expression() -> t.Optional[exp.Expression]: 3101 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3102 return exp.var(self._prev.text.upper()) 3103 return self._parse_expression() 3104 3105 if self._match(TokenType.L_PAREN): 3106 expressions = self._parse_csv(_parse_value_expression) 3107 self._match_r_paren() 3108 return self.expression(exp.Tuple, expressions=expressions) 3109 3110 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3111 expression = self._parse_expression() 3112 if expression: 3113 return self.expression(exp.Tuple, expressions=[expression]) 3114 return None 3115 3116 def _parse_projections(self) -> t.List[exp.Expression]: 3117 return self._parse_expressions() 3118 3119 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3120 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3121 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3122 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3123 ) 3124 elif self._match(TokenType.FROM): 3125 from_ = self._parse_from(skip_from_token=True) 3126 # Support parentheses for duckdb FROM-first syntax 3127 select = self._parse_select() 3128 if select: 3129 select.set("from", from_) 3130 this = select 3131 else: 3132 this = exp.select("*").from_(t.cast(exp.From, from_)) 3133 else: 3134 this = ( 3135 self._parse_table() 3136 if table 3137 else self._parse_select(nested=True, parse_set_operation=False) 3138 ) 3139 3140 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3141 # in case a modifier (e.g. join) is following 3142 if table and isinstance(this, exp.Values) and this.alias: 3143 alias = this.args["alias"].pop() 3144 this = exp.Table(this=this, alias=alias) 3145 3146 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3147 3148 return this 3149 3150 def _parse_select( 3151 self, 3152 nested: bool = False, 3153 table: bool = False, 3154 parse_subquery_alias: bool = True, 3155 parse_set_operation: bool = True, 3156 ) -> t.Optional[exp.Expression]: 3157 cte = self._parse_with() 3158 3159 if cte: 3160 this = self._parse_statement() 3161 3162 if not this: 3163 self.raise_error("Failed to parse any statement following CTE") 3164 return cte 3165 3166 if "with" in this.arg_types: 3167 this.set("with", cte) 3168 else: 3169 self.raise_error(f"{this.key} does not support CTE") 3170 this = cte 3171 3172 return this 3173 3174 # duckdb supports leading with FROM x 3175 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3176 3177 if self._match(TokenType.SELECT): 3178 comments = self._prev_comments 3179 3180 hint = self._parse_hint() 3181 3182 if self._next and not self._next.token_type == TokenType.DOT: 3183 all_ = self._match(TokenType.ALL) 3184 distinct = self._match_set(self.DISTINCT_TOKENS) 3185 else: 3186 all_, distinct = None, None 3187 3188 kind = ( 3189 self._match(TokenType.ALIAS) 3190 and self._match_texts(("STRUCT", "VALUE")) 3191 and self._prev.text.upper() 3192 ) 3193 3194 if distinct: 3195 distinct = self.expression( 3196 exp.Distinct, 3197 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3198 ) 3199 3200 if all_ and distinct: 3201 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3202 3203 operation_modifiers = [] 3204 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3205 operation_modifiers.append(exp.var(self._prev.text.upper())) 3206 3207 limit = self._parse_limit(top=True) 3208 projections = self._parse_projections() 3209 3210 this = self.expression( 3211 exp.Select, 3212 kind=kind, 3213 hint=hint, 3214 distinct=distinct, 3215 expressions=projections, 3216 limit=limit, 3217 operation_modifiers=operation_modifiers or None, 3218 ) 3219 this.comments = comments 3220 3221 into = self._parse_into() 3222 if into: 3223 this.set("into", into) 3224 3225 if not from_: 3226 from_ = self._parse_from() 3227 3228 if from_: 3229 this.set("from", from_) 3230 3231 this = self._parse_query_modifiers(this) 3232 elif (table or nested) and self._match(TokenType.L_PAREN): 3233 this = self._parse_wrapped_select(table=table) 3234 3235 # We return early here so that the UNION isn't attached to the subquery by the 3236 # following call to _parse_set_operations, but instead becomes the parent node 3237 self._match_r_paren() 3238 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3239 elif self._match(TokenType.VALUES, advance=False): 3240 this = self._parse_derived_table_values() 3241 elif from_: 3242 this = exp.select("*").from_(from_.this, copy=False) 3243 elif self._match(TokenType.SUMMARIZE): 3244 table = self._match(TokenType.TABLE) 3245 this = self._parse_select() or self._parse_string() or self._parse_table() 3246 return self.expression(exp.Summarize, this=this, table=table) 3247 elif self._match(TokenType.DESCRIBE): 3248 this = self._parse_describe() 3249 elif self._match_text_seq("STREAM"): 3250 this = self._parse_function() 3251 if this: 3252 this = self.expression(exp.Stream, this=this) 3253 else: 3254 self._retreat(self._index - 1) 3255 else: 3256 this = None 3257 3258 return self._parse_set_operations(this) if parse_set_operation else this 3259 3260 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3261 self._match_text_seq("SEARCH") 3262 3263 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3264 3265 if not kind: 3266 return None 3267 3268 self._match_text_seq("FIRST", "BY") 3269 3270 return self.expression( 3271 exp.RecursiveWithSearch, 3272 kind=kind, 3273 this=self._parse_id_var(), 3274 expression=self._match_text_seq("SET") and self._parse_id_var(), 3275 using=self._match_text_seq("USING") and self._parse_id_var(), 3276 ) 3277 3278 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3279 if not skip_with_token and not self._match(TokenType.WITH): 3280 return None 3281 3282 comments = self._prev_comments 3283 recursive = self._match(TokenType.RECURSIVE) 3284 3285 last_comments = None 3286 expressions = [] 3287 while True: 3288 cte = self._parse_cte() 3289 if isinstance(cte, exp.CTE): 3290 expressions.append(cte) 3291 if last_comments: 3292 cte.add_comments(last_comments) 3293 3294 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3295 break 3296 else: 3297 self._match(TokenType.WITH) 3298 3299 last_comments = self._prev_comments 3300 3301 return self.expression( 3302 exp.With, 3303 comments=comments, 3304 expressions=expressions, 3305 recursive=recursive, 3306 search=self._parse_recursive_with_search(), 3307 ) 3308 3309 def _parse_cte(self) -> t.Optional[exp.CTE]: 3310 index = self._index 3311 3312 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3313 if not alias or not alias.this: 3314 self.raise_error("Expected CTE to have alias") 3315 3316 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3317 self._retreat(index) 3318 return None 3319 3320 comments = self._prev_comments 3321 3322 if self._match_text_seq("NOT", "MATERIALIZED"): 3323 materialized = False 3324 elif self._match_text_seq("MATERIALIZED"): 3325 materialized = True 3326 else: 3327 materialized = None 3328 3329 cte = self.expression( 3330 exp.CTE, 3331 this=self._parse_wrapped(self._parse_statement), 3332 alias=alias, 3333 materialized=materialized, 3334 comments=comments, 3335 ) 3336 3337 if isinstance(cte.this, exp.Values): 3338 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3339 3340 return cte 3341 3342 def _parse_table_alias( 3343 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3344 ) -> t.Optional[exp.TableAlias]: 3345 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3346 # so this section tries to parse the clause version and if it fails, it treats the token 3347 # as an identifier (alias) 3348 if self._can_parse_limit_or_offset(): 3349 return None 3350 3351 any_token = self._match(TokenType.ALIAS) 3352 alias = ( 3353 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3354 or self._parse_string_as_identifier() 3355 ) 3356 3357 index = self._index 3358 if self._match(TokenType.L_PAREN): 3359 columns = self._parse_csv(self._parse_function_parameter) 3360 self._match_r_paren() if columns else self._retreat(index) 3361 else: 3362 columns = None 3363 3364 if not alias and not columns: 3365 return None 3366 3367 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3368 3369 # We bubble up comments from the Identifier to the TableAlias 3370 if isinstance(alias, exp.Identifier): 3371 table_alias.add_comments(alias.pop_comments()) 3372 3373 return table_alias 3374 3375 def _parse_subquery( 3376 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3377 ) -> t.Optional[exp.Subquery]: 3378 if not this: 3379 return None 3380 3381 return self.expression( 3382 exp.Subquery, 3383 this=this, 3384 pivots=self._parse_pivots(), 3385 alias=self._parse_table_alias() if parse_alias else None, 3386 sample=self._parse_table_sample(), 3387 ) 3388 3389 def _implicit_unnests_to_explicit(self, this: E) -> E: 3390 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3391 3392 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3393 for i, join in enumerate(this.args.get("joins") or []): 3394 table = join.this 3395 normalized_table = table.copy() 3396 normalized_table.meta["maybe_column"] = True 3397 normalized_table = _norm(normalized_table, dialect=self.dialect) 3398 3399 if isinstance(table, exp.Table) and not join.args.get("on"): 3400 if normalized_table.parts[0].name in refs: 3401 table_as_column = table.to_column() 3402 unnest = exp.Unnest(expressions=[table_as_column]) 3403 3404 # Table.to_column creates a parent Alias node that we want to convert to 3405 # a TableAlias and attach to the Unnest, so it matches the parser's output 3406 if isinstance(table.args.get("alias"), exp.TableAlias): 3407 table_as_column.replace(table_as_column.this) 3408 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3409 3410 table.replace(unnest) 3411 3412 refs.add(normalized_table.alias_or_name) 3413 3414 return this 3415 3416 def _parse_query_modifiers( 3417 self, this: t.Optional[exp.Expression] 3418 ) -> t.Optional[exp.Expression]: 3419 if isinstance(this, self.MODIFIABLES): 3420 for join in self._parse_joins(): 3421 this.append("joins", join) 3422 for lateral in iter(self._parse_lateral, None): 3423 this.append("laterals", lateral) 3424 3425 while True: 3426 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3427 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3428 key, expression = parser(self) 3429 3430 if expression: 3431 this.set(key, expression) 3432 if key == "limit": 3433 offset = expression.args.pop("offset", None) 3434 3435 if offset: 3436 offset = exp.Offset(expression=offset) 3437 this.set("offset", offset) 3438 3439 limit_by_expressions = expression.expressions 3440 expression.set("expressions", None) 3441 offset.set("expressions", limit_by_expressions) 3442 continue 3443 break 3444 3445 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3446 this = self._implicit_unnests_to_explicit(this) 3447 3448 return this 3449 3450 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3451 start = self._curr 3452 while self._curr: 3453 self._advance() 3454 3455 end = self._tokens[self._index - 1] 3456 return exp.Hint(expressions=[self._find_sql(start, end)]) 3457 3458 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3459 return self._parse_function_call() 3460 3461 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3462 start_index = self._index 3463 should_fallback_to_string = False 3464 3465 hints = [] 3466 try: 3467 for hint in iter( 3468 lambda: self._parse_csv( 3469 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3470 ), 3471 [], 3472 ): 3473 hints.extend(hint) 3474 except ParseError: 3475 should_fallback_to_string = True 3476 3477 if should_fallback_to_string or self._curr: 3478 self._retreat(start_index) 3479 return self._parse_hint_fallback_to_string() 3480 3481 return self.expression(exp.Hint, expressions=hints) 3482 3483 def _parse_hint(self) -> t.Optional[exp.Hint]: 3484 if self._match(TokenType.HINT) and self._prev_comments: 3485 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3486 3487 return None 3488 3489 def _parse_into(self) -> t.Optional[exp.Into]: 3490 if not self._match(TokenType.INTO): 3491 return None 3492 3493 temp = self._match(TokenType.TEMPORARY) 3494 unlogged = self._match_text_seq("UNLOGGED") 3495 self._match(TokenType.TABLE) 3496 3497 return self.expression( 3498 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3499 ) 3500 3501 def _parse_from( 3502 self, joins: bool = False, skip_from_token: bool = False 3503 ) -> t.Optional[exp.From]: 3504 if not skip_from_token and not self._match(TokenType.FROM): 3505 return None 3506 3507 return self.expression( 3508 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3509 ) 3510 3511 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3512 return self.expression( 3513 exp.MatchRecognizeMeasure, 3514 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3515 this=self._parse_expression(), 3516 ) 3517 3518 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3519 if not self._match(TokenType.MATCH_RECOGNIZE): 3520 return None 3521 3522 self._match_l_paren() 3523 3524 partition = self._parse_partition_by() 3525 order = self._parse_order() 3526 3527 measures = ( 3528 self._parse_csv(self._parse_match_recognize_measure) 3529 if self._match_text_seq("MEASURES") 3530 else None 3531 ) 3532 3533 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3534 rows = exp.var("ONE ROW PER MATCH") 3535 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3536 text = "ALL ROWS PER MATCH" 3537 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3538 text += " SHOW EMPTY MATCHES" 3539 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3540 text += " OMIT EMPTY MATCHES" 3541 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3542 text += " WITH UNMATCHED ROWS" 3543 rows = exp.var(text) 3544 else: 3545 rows = None 3546 3547 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3548 text = "AFTER MATCH SKIP" 3549 if self._match_text_seq("PAST", "LAST", "ROW"): 3550 text += " PAST LAST ROW" 3551 elif self._match_text_seq("TO", "NEXT", "ROW"): 3552 text += " TO NEXT ROW" 3553 elif self._match_text_seq("TO", "FIRST"): 3554 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3555 elif self._match_text_seq("TO", "LAST"): 3556 text += f" TO LAST {self._advance_any().text}" # type: ignore 3557 after = exp.var(text) 3558 else: 3559 after = None 3560 3561 if self._match_text_seq("PATTERN"): 3562 self._match_l_paren() 3563 3564 if not self._curr: 3565 self.raise_error("Expecting )", self._curr) 3566 3567 paren = 1 3568 start = self._curr 3569 3570 while self._curr and paren > 0: 3571 if self._curr.token_type == TokenType.L_PAREN: 3572 paren += 1 3573 if self._curr.token_type == TokenType.R_PAREN: 3574 paren -= 1 3575 3576 end = self._prev 3577 self._advance() 3578 3579 if paren > 0: 3580 self.raise_error("Expecting )", self._curr) 3581 3582 pattern = exp.var(self._find_sql(start, end)) 3583 else: 3584 pattern = None 3585 3586 define = ( 3587 self._parse_csv(self._parse_name_as_expression) 3588 if self._match_text_seq("DEFINE") 3589 else None 3590 ) 3591 3592 self._match_r_paren() 3593 3594 return self.expression( 3595 exp.MatchRecognize, 3596 partition_by=partition, 3597 order=order, 3598 measures=measures, 3599 rows=rows, 3600 after=after, 3601 pattern=pattern, 3602 define=define, 3603 alias=self._parse_table_alias(), 3604 ) 3605 3606 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3607 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3608 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3609 cross_apply = False 3610 3611 if cross_apply is not None: 3612 this = self._parse_select(table=True) 3613 view = None 3614 outer = None 3615 elif self._match(TokenType.LATERAL): 3616 this = self._parse_select(table=True) 3617 view = self._match(TokenType.VIEW) 3618 outer = self._match(TokenType.OUTER) 3619 else: 3620 return None 3621 3622 if not this: 3623 this = ( 3624 self._parse_unnest() 3625 or self._parse_function() 3626 or self._parse_id_var(any_token=False) 3627 ) 3628 3629 while self._match(TokenType.DOT): 3630 this = exp.Dot( 3631 this=this, 3632 expression=self._parse_function() or self._parse_id_var(any_token=False), 3633 ) 3634 3635 ordinality: t.Optional[bool] = None 3636 3637 if view: 3638 table = self._parse_id_var(any_token=False) 3639 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3640 table_alias: t.Optional[exp.TableAlias] = self.expression( 3641 exp.TableAlias, this=table, columns=columns 3642 ) 3643 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3644 # We move the alias from the lateral's child node to the lateral itself 3645 table_alias = this.args["alias"].pop() 3646 else: 3647 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3648 table_alias = self._parse_table_alias() 3649 3650 return self.expression( 3651 exp.Lateral, 3652 this=this, 3653 view=view, 3654 outer=outer, 3655 alias=table_alias, 3656 cross_apply=cross_apply, 3657 ordinality=ordinality, 3658 ) 3659 3660 def _parse_join_parts( 3661 self, 3662 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3663 return ( 3664 self._match_set(self.JOIN_METHODS) and self._prev, 3665 self._match_set(self.JOIN_SIDES) and self._prev, 3666 self._match_set(self.JOIN_KINDS) and self._prev, 3667 ) 3668 3669 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3670 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3671 this = self._parse_column() 3672 if isinstance(this, exp.Column): 3673 return this.this 3674 return this 3675 3676 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3677 3678 def _parse_join( 3679 self, skip_join_token: bool = False, parse_bracket: bool = False 3680 ) -> t.Optional[exp.Join]: 3681 if self._match(TokenType.COMMA): 3682 table = self._try_parse(self._parse_table) 3683 if table: 3684 return self.expression(exp.Join, this=table) 3685 return None 3686 3687 index = self._index 3688 method, side, kind = self._parse_join_parts() 3689 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3690 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3691 3692 if not skip_join_token and not join: 3693 self._retreat(index) 3694 kind = None 3695 method = None 3696 side = None 3697 3698 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3699 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3700 3701 if not skip_join_token and not join and not outer_apply and not cross_apply: 3702 return None 3703 3704 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3705 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3706 kwargs["expressions"] = self._parse_csv( 3707 lambda: self._parse_table(parse_bracket=parse_bracket) 3708 ) 3709 3710 if method: 3711 kwargs["method"] = method.text 3712 if side: 3713 kwargs["side"] = side.text 3714 if kind: 3715 kwargs["kind"] = kind.text 3716 if hint: 3717 kwargs["hint"] = hint 3718 3719 if self._match(TokenType.MATCH_CONDITION): 3720 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3721 3722 if self._match(TokenType.ON): 3723 kwargs["on"] = self._parse_assignment() 3724 elif self._match(TokenType.USING): 3725 kwargs["using"] = self._parse_using_identifiers() 3726 elif ( 3727 not (outer_apply or cross_apply) 3728 and not isinstance(kwargs["this"], exp.Unnest) 3729 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3730 ): 3731 index = self._index 3732 joins: t.Optional[list] = list(self._parse_joins()) 3733 3734 if joins and self._match(TokenType.ON): 3735 kwargs["on"] = self._parse_assignment() 3736 elif joins and self._match(TokenType.USING): 3737 kwargs["using"] = self._parse_using_identifiers() 3738 else: 3739 joins = None 3740 self._retreat(index) 3741 3742 kwargs["this"].set("joins", joins if joins else None) 3743 3744 kwargs["pivots"] = self._parse_pivots() 3745 3746 comments = [c for token in (method, side, kind) if token for c in token.comments] 3747 return self.expression(exp.Join, comments=comments, **kwargs) 3748 3749 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3750 this = self._parse_assignment() 3751 3752 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3753 return this 3754 3755 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3756 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3757 3758 return this 3759 3760 def _parse_index_params(self) -> exp.IndexParameters: 3761 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3762 3763 if self._match(TokenType.L_PAREN, advance=False): 3764 columns = self._parse_wrapped_csv(self._parse_with_operator) 3765 else: 3766 columns = None 3767 3768 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3769 partition_by = self._parse_partition_by() 3770 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3771 tablespace = ( 3772 self._parse_var(any_token=True) 3773 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3774 else None 3775 ) 3776 where = self._parse_where() 3777 3778 on = self._parse_field() if self._match(TokenType.ON) else None 3779 3780 return self.expression( 3781 exp.IndexParameters, 3782 using=using, 3783 columns=columns, 3784 include=include, 3785 partition_by=partition_by, 3786 where=where, 3787 with_storage=with_storage, 3788 tablespace=tablespace, 3789 on=on, 3790 ) 3791 3792 def _parse_index( 3793 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3794 ) -> t.Optional[exp.Index]: 3795 if index or anonymous: 3796 unique = None 3797 primary = None 3798 amp = None 3799 3800 self._match(TokenType.ON) 3801 self._match(TokenType.TABLE) # hive 3802 table = self._parse_table_parts(schema=True) 3803 else: 3804 unique = self._match(TokenType.UNIQUE) 3805 primary = self._match_text_seq("PRIMARY") 3806 amp = self._match_text_seq("AMP") 3807 3808 if not self._match(TokenType.INDEX): 3809 return None 3810 3811 index = self._parse_id_var() 3812 table = None 3813 3814 params = self._parse_index_params() 3815 3816 return self.expression( 3817 exp.Index, 3818 this=index, 3819 table=table, 3820 unique=unique, 3821 primary=primary, 3822 amp=amp, 3823 params=params, 3824 ) 3825 3826 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3827 hints: t.List[exp.Expression] = [] 3828 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3829 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3830 hints.append( 3831 self.expression( 3832 exp.WithTableHint, 3833 expressions=self._parse_csv( 3834 lambda: self._parse_function() or self._parse_var(any_token=True) 3835 ), 3836 ) 3837 ) 3838 self._match_r_paren() 3839 else: 3840 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3841 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3842 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3843 3844 self._match_set((TokenType.INDEX, TokenType.KEY)) 3845 if self._match(TokenType.FOR): 3846 hint.set("target", self._advance_any() and self._prev.text.upper()) 3847 3848 hint.set("expressions", self._parse_wrapped_id_vars()) 3849 hints.append(hint) 3850 3851 return hints or None 3852 3853 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3854 return ( 3855 (not schema and self._parse_function(optional_parens=False)) 3856 or self._parse_id_var(any_token=False) 3857 or self._parse_string_as_identifier() 3858 or self._parse_placeholder() 3859 ) 3860 3861 def _parse_table_parts( 3862 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3863 ) -> exp.Table: 3864 catalog = None 3865 db = None 3866 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3867 3868 while self._match(TokenType.DOT): 3869 if catalog: 3870 # This allows nesting the table in arbitrarily many dot expressions if needed 3871 table = self.expression( 3872 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3873 ) 3874 else: 3875 catalog = db 3876 db = table 3877 # "" used for tsql FROM a..b case 3878 table = self._parse_table_part(schema=schema) or "" 3879 3880 if ( 3881 wildcard 3882 and self._is_connected() 3883 and (isinstance(table, exp.Identifier) or not table) 3884 and self._match(TokenType.STAR) 3885 ): 3886 if isinstance(table, exp.Identifier): 3887 table.args["this"] += "*" 3888 else: 3889 table = exp.Identifier(this="*") 3890 3891 # We bubble up comments from the Identifier to the Table 3892 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3893 3894 if is_db_reference: 3895 catalog = db 3896 db = table 3897 table = None 3898 3899 if not table and not is_db_reference: 3900 self.raise_error(f"Expected table name but got {self._curr}") 3901 if not db and is_db_reference: 3902 self.raise_error(f"Expected database name but got {self._curr}") 3903 3904 table = self.expression( 3905 exp.Table, 3906 comments=comments, 3907 this=table, 3908 db=db, 3909 catalog=catalog, 3910 ) 3911 3912 changes = self._parse_changes() 3913 if changes: 3914 table.set("changes", changes) 3915 3916 at_before = self._parse_historical_data() 3917 if at_before: 3918 table.set("when", at_before) 3919 3920 pivots = self._parse_pivots() 3921 if pivots: 3922 table.set("pivots", pivots) 3923 3924 return table 3925 3926 def _parse_table( 3927 self, 3928 schema: bool = False, 3929 joins: bool = False, 3930 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3931 parse_bracket: bool = False, 3932 is_db_reference: bool = False, 3933 parse_partition: bool = False, 3934 ) -> t.Optional[exp.Expression]: 3935 lateral = self._parse_lateral() 3936 if lateral: 3937 return lateral 3938 3939 unnest = self._parse_unnest() 3940 if unnest: 3941 return unnest 3942 3943 values = self._parse_derived_table_values() 3944 if values: 3945 return values 3946 3947 subquery = self._parse_select(table=True) 3948 if subquery: 3949 if not subquery.args.get("pivots"): 3950 subquery.set("pivots", self._parse_pivots()) 3951 return subquery 3952 3953 bracket = parse_bracket and self._parse_bracket(None) 3954 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3955 3956 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3957 self._parse_table 3958 ) 3959 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3960 3961 only = self._match(TokenType.ONLY) 3962 3963 this = t.cast( 3964 exp.Expression, 3965 bracket 3966 or rows_from 3967 or self._parse_bracket( 3968 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3969 ), 3970 ) 3971 3972 if only: 3973 this.set("only", only) 3974 3975 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3976 self._match_text_seq("*") 3977 3978 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3979 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3980 this.set("partition", self._parse_partition()) 3981 3982 if schema: 3983 return self._parse_schema(this=this) 3984 3985 version = self._parse_version() 3986 3987 if version: 3988 this.set("version", version) 3989 3990 if self.dialect.ALIAS_POST_TABLESAMPLE: 3991 this.set("sample", self._parse_table_sample()) 3992 3993 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3994 if alias: 3995 this.set("alias", alias) 3996 3997 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3998 return self.expression( 3999 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4000 ) 4001 4002 this.set("hints", self._parse_table_hints()) 4003 4004 if not this.args.get("pivots"): 4005 this.set("pivots", self._parse_pivots()) 4006 4007 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4008 this.set("sample", self._parse_table_sample()) 4009 4010 if joins: 4011 for join in self._parse_joins(): 4012 this.append("joins", join) 4013 4014 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4015 this.set("ordinality", True) 4016 this.set("alias", self._parse_table_alias()) 4017 4018 return this 4019 4020 def _parse_version(self) -> t.Optional[exp.Version]: 4021 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4022 this = "TIMESTAMP" 4023 elif self._match(TokenType.VERSION_SNAPSHOT): 4024 this = "VERSION" 4025 else: 4026 return None 4027 4028 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4029 kind = self._prev.text.upper() 4030 start = self._parse_bitwise() 4031 self._match_texts(("TO", "AND")) 4032 end = self._parse_bitwise() 4033 expression: t.Optional[exp.Expression] = self.expression( 4034 exp.Tuple, expressions=[start, end] 4035 ) 4036 elif self._match_text_seq("CONTAINED", "IN"): 4037 kind = "CONTAINED IN" 4038 expression = self.expression( 4039 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4040 ) 4041 elif self._match(TokenType.ALL): 4042 kind = "ALL" 4043 expression = None 4044 else: 4045 self._match_text_seq("AS", "OF") 4046 kind = "AS OF" 4047 expression = self._parse_type() 4048 4049 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4050 4051 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4052 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4053 index = self._index 4054 historical_data = None 4055 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4056 this = self._prev.text.upper() 4057 kind = ( 4058 self._match(TokenType.L_PAREN) 4059 and self._match_texts(self.HISTORICAL_DATA_KIND) 4060 and self._prev.text.upper() 4061 ) 4062 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4063 4064 if expression: 4065 self._match_r_paren() 4066 historical_data = self.expression( 4067 exp.HistoricalData, this=this, kind=kind, expression=expression 4068 ) 4069 else: 4070 self._retreat(index) 4071 4072 return historical_data 4073 4074 def _parse_changes(self) -> t.Optional[exp.Changes]: 4075 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4076 return None 4077 4078 information = self._parse_var(any_token=True) 4079 self._match_r_paren() 4080 4081 return self.expression( 4082 exp.Changes, 4083 information=information, 4084 at_before=self._parse_historical_data(), 4085 end=self._parse_historical_data(), 4086 ) 4087 4088 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4089 if not self._match(TokenType.UNNEST): 4090 return None 4091 4092 expressions = self._parse_wrapped_csv(self._parse_equality) 4093 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4094 4095 alias = self._parse_table_alias() if with_alias else None 4096 4097 if alias: 4098 if self.dialect.UNNEST_COLUMN_ONLY: 4099 if alias.args.get("columns"): 4100 self.raise_error("Unexpected extra column alias in unnest.") 4101 4102 alias.set("columns", [alias.this]) 4103 alias.set("this", None) 4104 4105 columns = alias.args.get("columns") or [] 4106 if offset and len(expressions) < len(columns): 4107 offset = columns.pop() 4108 4109 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4110 self._match(TokenType.ALIAS) 4111 offset = self._parse_id_var( 4112 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4113 ) or exp.to_identifier("offset") 4114 4115 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4116 4117 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4118 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4119 if not is_derived and not ( 4120 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4121 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4122 ): 4123 return None 4124 4125 expressions = self._parse_csv(self._parse_value) 4126 alias = self._parse_table_alias() 4127 4128 if is_derived: 4129 self._match_r_paren() 4130 4131 return self.expression( 4132 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4133 ) 4134 4135 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4136 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4137 as_modifier and self._match_text_seq("USING", "SAMPLE") 4138 ): 4139 return None 4140 4141 bucket_numerator = None 4142 bucket_denominator = None 4143 bucket_field = None 4144 percent = None 4145 size = None 4146 seed = None 4147 4148 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4149 matched_l_paren = self._match(TokenType.L_PAREN) 4150 4151 if self.TABLESAMPLE_CSV: 4152 num = None 4153 expressions = self._parse_csv(self._parse_primary) 4154 else: 4155 expressions = None 4156 num = ( 4157 self._parse_factor() 4158 if self._match(TokenType.NUMBER, advance=False) 4159 else self._parse_primary() or self._parse_placeholder() 4160 ) 4161 4162 if self._match_text_seq("BUCKET"): 4163 bucket_numerator = self._parse_number() 4164 self._match_text_seq("OUT", "OF") 4165 bucket_denominator = bucket_denominator = self._parse_number() 4166 self._match(TokenType.ON) 4167 bucket_field = self._parse_field() 4168 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4169 percent = num 4170 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4171 size = num 4172 else: 4173 percent = num 4174 4175 if matched_l_paren: 4176 self._match_r_paren() 4177 4178 if self._match(TokenType.L_PAREN): 4179 method = self._parse_var(upper=True) 4180 seed = self._match(TokenType.COMMA) and self._parse_number() 4181 self._match_r_paren() 4182 elif self._match_texts(("SEED", "REPEATABLE")): 4183 seed = self._parse_wrapped(self._parse_number) 4184 4185 if not method and self.DEFAULT_SAMPLING_METHOD: 4186 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4187 4188 return self.expression( 4189 exp.TableSample, 4190 expressions=expressions, 4191 method=method, 4192 bucket_numerator=bucket_numerator, 4193 bucket_denominator=bucket_denominator, 4194 bucket_field=bucket_field, 4195 percent=percent, 4196 size=size, 4197 seed=seed, 4198 ) 4199 4200 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4201 return list(iter(self._parse_pivot, None)) or None 4202 4203 def _parse_joins(self) -> t.Iterator[exp.Join]: 4204 return iter(self._parse_join, None) 4205 4206 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4207 if not self._match(TokenType.INTO): 4208 return None 4209 4210 return self.expression( 4211 exp.UnpivotColumns, 4212 this=self._match_text_seq("NAME") and self._parse_column(), 4213 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4214 ) 4215 4216 # https://duckdb.org/docs/sql/statements/pivot 4217 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4218 def _parse_on() -> t.Optional[exp.Expression]: 4219 this = self._parse_bitwise() 4220 4221 if self._match(TokenType.IN): 4222 # PIVOT ... ON col IN (row_val1, row_val2) 4223 return self._parse_in(this) 4224 if self._match(TokenType.ALIAS, advance=False): 4225 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4226 return self._parse_alias(this) 4227 4228 return this 4229 4230 this = self._parse_table() 4231 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4232 into = self._parse_unpivot_columns() 4233 using = self._match(TokenType.USING) and self._parse_csv( 4234 lambda: self._parse_alias(self._parse_function()) 4235 ) 4236 group = self._parse_group() 4237 4238 return self.expression( 4239 exp.Pivot, 4240 this=this, 4241 expressions=expressions, 4242 using=using, 4243 group=group, 4244 unpivot=is_unpivot, 4245 into=into, 4246 ) 4247 4248 def _parse_pivot_in(self) -> exp.In: 4249 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4250 this = self._parse_select_or_expression() 4251 4252 self._match(TokenType.ALIAS) 4253 alias = self._parse_bitwise() 4254 if alias: 4255 if isinstance(alias, exp.Column) and not alias.db: 4256 alias = alias.this 4257 return self.expression(exp.PivotAlias, this=this, alias=alias) 4258 4259 return this 4260 4261 value = self._parse_column() 4262 4263 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4264 self.raise_error("Expecting IN (") 4265 4266 if self._match(TokenType.ANY): 4267 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4268 else: 4269 exprs = self._parse_csv(_parse_aliased_expression) 4270 4271 self._match_r_paren() 4272 return self.expression(exp.In, this=value, expressions=exprs) 4273 4274 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4275 index = self._index 4276 include_nulls = None 4277 4278 if self._match(TokenType.PIVOT): 4279 unpivot = False 4280 elif self._match(TokenType.UNPIVOT): 4281 unpivot = True 4282 4283 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4284 if self._match_text_seq("INCLUDE", "NULLS"): 4285 include_nulls = True 4286 elif self._match_text_seq("EXCLUDE", "NULLS"): 4287 include_nulls = False 4288 else: 4289 return None 4290 4291 expressions = [] 4292 4293 if not self._match(TokenType.L_PAREN): 4294 self._retreat(index) 4295 return None 4296 4297 if unpivot: 4298 expressions = self._parse_csv(self._parse_column) 4299 else: 4300 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4301 4302 if not expressions: 4303 self.raise_error("Failed to parse PIVOT's aggregation list") 4304 4305 if not self._match(TokenType.FOR): 4306 self.raise_error("Expecting FOR") 4307 4308 fields = [] 4309 while True: 4310 field = self._try_parse(self._parse_pivot_in) 4311 if not field: 4312 break 4313 fields.append(field) 4314 4315 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4316 self._parse_bitwise 4317 ) 4318 4319 group = self._parse_group() 4320 4321 self._match_r_paren() 4322 4323 pivot = self.expression( 4324 exp.Pivot, 4325 expressions=expressions, 4326 fields=fields, 4327 unpivot=unpivot, 4328 include_nulls=include_nulls, 4329 default_on_null=default_on_null, 4330 group=group, 4331 ) 4332 4333 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4334 pivot.set("alias", self._parse_table_alias()) 4335 4336 if not unpivot: 4337 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4338 4339 columns: t.List[exp.Expression] = [] 4340 all_fields = [] 4341 for pivot_field in pivot.fields: 4342 pivot_field_expressions = pivot_field.expressions 4343 4344 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4345 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4346 continue 4347 4348 all_fields.append( 4349 [ 4350 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4351 for fld in pivot_field_expressions 4352 ] 4353 ) 4354 4355 if all_fields: 4356 if names: 4357 all_fields.append(names) 4358 4359 # Generate all possible combinations of the pivot columns 4360 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4361 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4362 for fld_parts_tuple in itertools.product(*all_fields): 4363 fld_parts = list(fld_parts_tuple) 4364 4365 if names and self.PREFIXED_PIVOT_COLUMNS: 4366 # Move the "name" to the front of the list 4367 fld_parts.insert(0, fld_parts.pop(-1)) 4368 4369 columns.append(exp.to_identifier("_".join(fld_parts))) 4370 4371 pivot.set("columns", columns) 4372 4373 return pivot 4374 4375 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4376 return [agg.alias for agg in aggregations if agg.alias] 4377 4378 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4379 if not skip_where_token and not self._match(TokenType.PREWHERE): 4380 return None 4381 4382 return self.expression( 4383 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4384 ) 4385 4386 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4387 if not skip_where_token and not self._match(TokenType.WHERE): 4388 return None 4389 4390 return self.expression( 4391 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4392 ) 4393 4394 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4395 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4396 return None 4397 4398 elements: t.Dict[str, t.Any] = defaultdict(list) 4399 4400 if self._match(TokenType.ALL): 4401 elements["all"] = True 4402 elif self._match(TokenType.DISTINCT): 4403 elements["all"] = False 4404 4405 while True: 4406 index = self._index 4407 4408 elements["expressions"].extend( 4409 self._parse_csv( 4410 lambda: None 4411 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4412 else self._parse_assignment() 4413 ) 4414 ) 4415 4416 before_with_index = self._index 4417 with_prefix = self._match(TokenType.WITH) 4418 4419 if self._match(TokenType.ROLLUP): 4420 elements["rollup"].append( 4421 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4422 ) 4423 elif self._match(TokenType.CUBE): 4424 elements["cube"].append( 4425 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4426 ) 4427 elif self._match(TokenType.GROUPING_SETS): 4428 elements["grouping_sets"].append( 4429 self.expression( 4430 exp.GroupingSets, 4431 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4432 ) 4433 ) 4434 elif self._match_text_seq("TOTALS"): 4435 elements["totals"] = True # type: ignore 4436 4437 if before_with_index <= self._index <= before_with_index + 1: 4438 self._retreat(before_with_index) 4439 break 4440 4441 if index == self._index: 4442 break 4443 4444 return self.expression(exp.Group, **elements) # type: ignore 4445 4446 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4447 return self.expression( 4448 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4449 ) 4450 4451 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4452 if self._match(TokenType.L_PAREN): 4453 grouping_set = self._parse_csv(self._parse_column) 4454 self._match_r_paren() 4455 return self.expression(exp.Tuple, expressions=grouping_set) 4456 4457 return self._parse_column() 4458 4459 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4460 if not skip_having_token and not self._match(TokenType.HAVING): 4461 return None 4462 return self.expression(exp.Having, this=self._parse_assignment()) 4463 4464 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4465 if not self._match(TokenType.QUALIFY): 4466 return None 4467 return self.expression(exp.Qualify, this=self._parse_assignment()) 4468 4469 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4470 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4471 exp.Prior, this=self._parse_bitwise() 4472 ) 4473 connect = self._parse_assignment() 4474 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4475 return connect 4476 4477 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4478 if skip_start_token: 4479 start = None 4480 elif self._match(TokenType.START_WITH): 4481 start = self._parse_assignment() 4482 else: 4483 return None 4484 4485 self._match(TokenType.CONNECT_BY) 4486 nocycle = self._match_text_seq("NOCYCLE") 4487 connect = self._parse_connect_with_prior() 4488 4489 if not start and self._match(TokenType.START_WITH): 4490 start = self._parse_assignment() 4491 4492 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4493 4494 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4495 this = self._parse_id_var(any_token=True) 4496 if self._match(TokenType.ALIAS): 4497 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4498 return this 4499 4500 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4501 if self._match_text_seq("INTERPOLATE"): 4502 return self._parse_wrapped_csv(self._parse_name_as_expression) 4503 return None 4504 4505 def _parse_order( 4506 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4507 ) -> t.Optional[exp.Expression]: 4508 siblings = None 4509 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4510 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4511 return this 4512 4513 siblings = True 4514 4515 return self.expression( 4516 exp.Order, 4517 this=this, 4518 expressions=self._parse_csv(self._parse_ordered), 4519 siblings=siblings, 4520 ) 4521 4522 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4523 if not self._match(token): 4524 return None 4525 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4526 4527 def _parse_ordered( 4528 self, parse_method: t.Optional[t.Callable] = None 4529 ) -> t.Optional[exp.Ordered]: 4530 this = parse_method() if parse_method else self._parse_assignment() 4531 if not this: 4532 return None 4533 4534 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4535 this = exp.var("ALL") 4536 4537 asc = self._match(TokenType.ASC) 4538 desc = self._match(TokenType.DESC) or (asc and False) 4539 4540 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4541 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4542 4543 nulls_first = is_nulls_first or False 4544 explicitly_null_ordered = is_nulls_first or is_nulls_last 4545 4546 if ( 4547 not explicitly_null_ordered 4548 and ( 4549 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4550 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4551 ) 4552 and self.dialect.NULL_ORDERING != "nulls_are_last" 4553 ): 4554 nulls_first = True 4555 4556 if self._match_text_seq("WITH", "FILL"): 4557 with_fill = self.expression( 4558 exp.WithFill, 4559 **{ # type: ignore 4560 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4561 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4562 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4563 "interpolate": self._parse_interpolate(), 4564 }, 4565 ) 4566 else: 4567 with_fill = None 4568 4569 return self.expression( 4570 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4571 ) 4572 4573 def _parse_limit_options(self) -> exp.LimitOptions: 4574 percent = self._match(TokenType.PERCENT) 4575 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4576 self._match_text_seq("ONLY") 4577 with_ties = self._match_text_seq("WITH", "TIES") 4578 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4579 4580 def _parse_limit( 4581 self, 4582 this: t.Optional[exp.Expression] = None, 4583 top: bool = False, 4584 skip_limit_token: bool = False, 4585 ) -> t.Optional[exp.Expression]: 4586 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4587 comments = self._prev_comments 4588 if top: 4589 limit_paren = self._match(TokenType.L_PAREN) 4590 expression = self._parse_term() if limit_paren else self._parse_number() 4591 4592 if limit_paren: 4593 self._match_r_paren() 4594 4595 limit_options = self._parse_limit_options() 4596 else: 4597 limit_options = None 4598 expression = self._parse_term() 4599 4600 if self._match(TokenType.COMMA): 4601 offset = expression 4602 expression = self._parse_term() 4603 else: 4604 offset = None 4605 4606 limit_exp = self.expression( 4607 exp.Limit, 4608 this=this, 4609 expression=expression, 4610 offset=offset, 4611 comments=comments, 4612 limit_options=limit_options, 4613 expressions=self._parse_limit_by(), 4614 ) 4615 4616 return limit_exp 4617 4618 if self._match(TokenType.FETCH): 4619 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4620 direction = self._prev.text.upper() if direction else "FIRST" 4621 4622 count = self._parse_field(tokens=self.FETCH_TOKENS) 4623 4624 return self.expression( 4625 exp.Fetch, 4626 direction=direction, 4627 count=count, 4628 limit_options=self._parse_limit_options(), 4629 ) 4630 4631 return this 4632 4633 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4634 if not self._match(TokenType.OFFSET): 4635 return this 4636 4637 count = self._parse_term() 4638 self._match_set((TokenType.ROW, TokenType.ROWS)) 4639 4640 return self.expression( 4641 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4642 ) 4643 4644 def _can_parse_limit_or_offset(self) -> bool: 4645 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4646 return False 4647 4648 index = self._index 4649 result = bool( 4650 self._try_parse(self._parse_limit, retreat=True) 4651 or self._try_parse(self._parse_offset, retreat=True) 4652 ) 4653 self._retreat(index) 4654 return result 4655 4656 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4657 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4658 4659 def _parse_locks(self) -> t.List[exp.Lock]: 4660 locks = [] 4661 while True: 4662 if self._match_text_seq("FOR", "UPDATE"): 4663 update = True 4664 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4665 "LOCK", "IN", "SHARE", "MODE" 4666 ): 4667 update = False 4668 else: 4669 break 4670 4671 expressions = None 4672 if self._match_text_seq("OF"): 4673 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4674 4675 wait: t.Optional[bool | exp.Expression] = None 4676 if self._match_text_seq("NOWAIT"): 4677 wait = True 4678 elif self._match_text_seq("WAIT"): 4679 wait = self._parse_primary() 4680 elif self._match_text_seq("SKIP", "LOCKED"): 4681 wait = False 4682 4683 locks.append( 4684 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4685 ) 4686 4687 return locks 4688 4689 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4690 start = self._index 4691 _, side_token, kind_token = self._parse_join_parts() 4692 4693 side = side_token.text if side_token else None 4694 kind = kind_token.text if kind_token else None 4695 4696 if not self._match_set(self.SET_OPERATIONS): 4697 self._retreat(start) 4698 return None 4699 4700 token_type = self._prev.token_type 4701 4702 if token_type == TokenType.UNION: 4703 operation: t.Type[exp.SetOperation] = exp.Union 4704 elif token_type == TokenType.EXCEPT: 4705 operation = exp.Except 4706 else: 4707 operation = exp.Intersect 4708 4709 comments = self._prev.comments 4710 4711 if self._match(TokenType.DISTINCT): 4712 distinct: t.Optional[bool] = True 4713 elif self._match(TokenType.ALL): 4714 distinct = False 4715 else: 4716 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4717 if distinct is None: 4718 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4719 4720 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4721 "STRICT", "CORRESPONDING" 4722 ) 4723 if self._match_text_seq("CORRESPONDING"): 4724 by_name = True 4725 if not side and not kind: 4726 kind = "INNER" 4727 4728 on_column_list = None 4729 if by_name and self._match_texts(("ON", "BY")): 4730 on_column_list = self._parse_wrapped_csv(self._parse_column) 4731 4732 expression = self._parse_select(nested=True, parse_set_operation=False) 4733 4734 return self.expression( 4735 operation, 4736 comments=comments, 4737 this=this, 4738 distinct=distinct, 4739 by_name=by_name, 4740 expression=expression, 4741 side=side, 4742 kind=kind, 4743 on=on_column_list, 4744 ) 4745 4746 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4747 while this: 4748 setop = self.parse_set_operation(this) 4749 if not setop: 4750 break 4751 this = setop 4752 4753 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4754 expression = this.expression 4755 4756 if expression: 4757 for arg in self.SET_OP_MODIFIERS: 4758 expr = expression.args.get(arg) 4759 if expr: 4760 this.set(arg, expr.pop()) 4761 4762 return this 4763 4764 def _parse_expression(self) -> t.Optional[exp.Expression]: 4765 return self._parse_alias(self._parse_assignment()) 4766 4767 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4768 this = self._parse_disjunction() 4769 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4770 # This allows us to parse <non-identifier token> := <expr> 4771 this = exp.column( 4772 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4773 ) 4774 4775 while self._match_set(self.ASSIGNMENT): 4776 if isinstance(this, exp.Column) and len(this.parts) == 1: 4777 this = this.this 4778 4779 this = self.expression( 4780 self.ASSIGNMENT[self._prev.token_type], 4781 this=this, 4782 comments=self._prev_comments, 4783 expression=self._parse_assignment(), 4784 ) 4785 4786 return this 4787 4788 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4789 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4790 4791 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4792 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4793 4794 def _parse_equality(self) -> t.Optional[exp.Expression]: 4795 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4796 4797 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4798 return self._parse_tokens(self._parse_range, self.COMPARISON) 4799 4800 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4801 this = this or self._parse_bitwise() 4802 negate = self._match(TokenType.NOT) 4803 4804 if self._match_set(self.RANGE_PARSERS): 4805 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4806 if not expression: 4807 return this 4808 4809 this = expression 4810 elif self._match(TokenType.ISNULL): 4811 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4812 4813 # Postgres supports ISNULL and NOTNULL for conditions. 4814 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4815 if self._match(TokenType.NOTNULL): 4816 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4817 this = self.expression(exp.Not, this=this) 4818 4819 if negate: 4820 this = self._negate_range(this) 4821 4822 if self._match(TokenType.IS): 4823 this = self._parse_is(this) 4824 4825 return this 4826 4827 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4828 if not this: 4829 return this 4830 4831 return self.expression(exp.Not, this=this) 4832 4833 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4834 index = self._index - 1 4835 negate = self._match(TokenType.NOT) 4836 4837 if self._match_text_seq("DISTINCT", "FROM"): 4838 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4839 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4840 4841 if self._match(TokenType.JSON): 4842 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4843 4844 if self._match_text_seq("WITH"): 4845 _with = True 4846 elif self._match_text_seq("WITHOUT"): 4847 _with = False 4848 else: 4849 _with = None 4850 4851 unique = self._match(TokenType.UNIQUE) 4852 self._match_text_seq("KEYS") 4853 expression: t.Optional[exp.Expression] = self.expression( 4854 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4855 ) 4856 else: 4857 expression = self._parse_primary() or self._parse_null() 4858 if not expression: 4859 self._retreat(index) 4860 return None 4861 4862 this = self.expression(exp.Is, this=this, expression=expression) 4863 return self.expression(exp.Not, this=this) if negate else this 4864 4865 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4866 unnest = self._parse_unnest(with_alias=False) 4867 if unnest: 4868 this = self.expression(exp.In, this=this, unnest=unnest) 4869 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4870 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4871 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4872 4873 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4874 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4875 else: 4876 this = self.expression(exp.In, this=this, expressions=expressions) 4877 4878 if matched_l_paren: 4879 self._match_r_paren(this) 4880 elif not self._match(TokenType.R_BRACKET, expression=this): 4881 self.raise_error("Expecting ]") 4882 else: 4883 this = self.expression(exp.In, this=this, field=self._parse_column()) 4884 4885 return this 4886 4887 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4888 low = self._parse_bitwise() 4889 self._match(TokenType.AND) 4890 high = self._parse_bitwise() 4891 return self.expression(exp.Between, this=this, low=low, high=high) 4892 4893 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4894 if not self._match(TokenType.ESCAPE): 4895 return this 4896 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4897 4898 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4899 index = self._index 4900 4901 if not self._match(TokenType.INTERVAL) and match_interval: 4902 return None 4903 4904 if self._match(TokenType.STRING, advance=False): 4905 this = self._parse_primary() 4906 else: 4907 this = self._parse_term() 4908 4909 if not this or ( 4910 isinstance(this, exp.Column) 4911 and not this.table 4912 and not this.this.quoted 4913 and this.name.upper() == "IS" 4914 ): 4915 self._retreat(index) 4916 return None 4917 4918 unit = self._parse_function() or ( 4919 not self._match(TokenType.ALIAS, advance=False) 4920 and self._parse_var(any_token=True, upper=True) 4921 ) 4922 4923 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4924 # each INTERVAL expression into this canonical form so it's easy to transpile 4925 if this and this.is_number: 4926 this = exp.Literal.string(this.to_py()) 4927 elif this and this.is_string: 4928 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4929 if parts and unit: 4930 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4931 unit = None 4932 self._retreat(self._index - 1) 4933 4934 if len(parts) == 1: 4935 this = exp.Literal.string(parts[0][0]) 4936 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4937 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4938 unit = self.expression( 4939 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4940 ) 4941 4942 interval = self.expression(exp.Interval, this=this, unit=unit) 4943 4944 index = self._index 4945 self._match(TokenType.PLUS) 4946 4947 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4948 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4949 return self.expression( 4950 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4951 ) 4952 4953 self._retreat(index) 4954 return interval 4955 4956 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4957 this = self._parse_term() 4958 4959 while True: 4960 if self._match_set(self.BITWISE): 4961 this = self.expression( 4962 self.BITWISE[self._prev.token_type], 4963 this=this, 4964 expression=self._parse_term(), 4965 ) 4966 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4967 this = self.expression( 4968 exp.DPipe, 4969 this=this, 4970 expression=self._parse_term(), 4971 safe=not self.dialect.STRICT_STRING_CONCAT, 4972 ) 4973 elif self._match(TokenType.DQMARK): 4974 this = self.expression( 4975 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4976 ) 4977 elif self._match_pair(TokenType.LT, TokenType.LT): 4978 this = self.expression( 4979 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4980 ) 4981 elif self._match_pair(TokenType.GT, TokenType.GT): 4982 this = self.expression( 4983 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4984 ) 4985 else: 4986 break 4987 4988 return this 4989 4990 def _parse_term(self) -> t.Optional[exp.Expression]: 4991 this = self._parse_factor() 4992 4993 while self._match_set(self.TERM): 4994 klass = self.TERM[self._prev.token_type] 4995 comments = self._prev_comments 4996 expression = self._parse_factor() 4997 4998 this = self.expression(klass, this=this, comments=comments, expression=expression) 4999 5000 if isinstance(this, exp.Collate): 5001 expr = this.expression 5002 5003 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5004 # fallback to Identifier / Var 5005 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5006 ident = expr.this 5007 if isinstance(ident, exp.Identifier): 5008 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5009 5010 return this 5011 5012 def _parse_factor(self) -> t.Optional[exp.Expression]: 5013 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5014 this = parse_method() 5015 5016 while self._match_set(self.FACTOR): 5017 klass = self.FACTOR[self._prev.token_type] 5018 comments = self._prev_comments 5019 expression = parse_method() 5020 5021 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5022 self._retreat(self._index - 1) 5023 return this 5024 5025 this = self.expression(klass, this=this, comments=comments, expression=expression) 5026 5027 if isinstance(this, exp.Div): 5028 this.args["typed"] = self.dialect.TYPED_DIVISION 5029 this.args["safe"] = self.dialect.SAFE_DIVISION 5030 5031 return this 5032 5033 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5034 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5035 5036 def _parse_unary(self) -> t.Optional[exp.Expression]: 5037 if self._match_set(self.UNARY_PARSERS): 5038 return self.UNARY_PARSERS[self._prev.token_type](self) 5039 return self._parse_at_time_zone(self._parse_type()) 5040 5041 def _parse_type( 5042 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5043 ) -> t.Optional[exp.Expression]: 5044 interval = parse_interval and self._parse_interval() 5045 if interval: 5046 return interval 5047 5048 index = self._index 5049 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5050 5051 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5052 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5053 if isinstance(data_type, exp.Cast): 5054 # This constructor can contain ops directly after it, for instance struct unnesting: 5055 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5056 return self._parse_column_ops(data_type) 5057 5058 if data_type: 5059 index2 = self._index 5060 this = self._parse_primary() 5061 5062 if isinstance(this, exp.Literal): 5063 this = self._parse_column_ops(this) 5064 5065 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5066 if parser: 5067 return parser(self, this, data_type) 5068 5069 return self.expression(exp.Cast, this=this, to=data_type) 5070 5071 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5072 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5073 # 5074 # If the index difference here is greater than 1, that means the parser itself must have 5075 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5076 # 5077 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5078 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5079 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5080 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5081 # 5082 # In these cases, we don't really want to return the converted type, but instead retreat 5083 # and try to parse a Column or Identifier in the section below. 5084 if data_type.expressions and index2 - index > 1: 5085 self._retreat(index2) 5086 return self._parse_column_ops(data_type) 5087 5088 self._retreat(index) 5089 5090 if fallback_to_identifier: 5091 return self._parse_id_var() 5092 5093 this = self._parse_column() 5094 return this and self._parse_column_ops(this) 5095 5096 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5097 this = self._parse_type() 5098 if not this: 5099 return None 5100 5101 if isinstance(this, exp.Column) and not this.table: 5102 this = exp.var(this.name.upper()) 5103 5104 return self.expression( 5105 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5106 ) 5107 5108 def _parse_types( 5109 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5110 ) -> t.Optional[exp.Expression]: 5111 index = self._index 5112 5113 this: t.Optional[exp.Expression] = None 5114 prefix = self._match_text_seq("SYSUDTLIB", ".") 5115 5116 if not self._match_set(self.TYPE_TOKENS): 5117 identifier = allow_identifiers and self._parse_id_var( 5118 any_token=False, tokens=(TokenType.VAR,) 5119 ) 5120 if isinstance(identifier, exp.Identifier): 5121 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5122 5123 if len(tokens) != 1: 5124 self.raise_error("Unexpected identifier", self._prev) 5125 5126 if tokens[0].token_type in self.TYPE_TOKENS: 5127 self._prev = tokens[0] 5128 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5129 type_name = identifier.name 5130 5131 while self._match(TokenType.DOT): 5132 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5133 5134 this = exp.DataType.build(type_name, udt=True) 5135 else: 5136 self._retreat(self._index - 1) 5137 return None 5138 else: 5139 return None 5140 5141 type_token = self._prev.token_type 5142 5143 if type_token == TokenType.PSEUDO_TYPE: 5144 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5145 5146 if type_token == TokenType.OBJECT_IDENTIFIER: 5147 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5148 5149 # https://materialize.com/docs/sql/types/map/ 5150 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5151 key_type = self._parse_types( 5152 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5153 ) 5154 if not self._match(TokenType.FARROW): 5155 self._retreat(index) 5156 return None 5157 5158 value_type = self._parse_types( 5159 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5160 ) 5161 if not self._match(TokenType.R_BRACKET): 5162 self._retreat(index) 5163 return None 5164 5165 return exp.DataType( 5166 this=exp.DataType.Type.MAP, 5167 expressions=[key_type, value_type], 5168 nested=True, 5169 prefix=prefix, 5170 ) 5171 5172 nested = type_token in self.NESTED_TYPE_TOKENS 5173 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5174 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5175 expressions = None 5176 maybe_func = False 5177 5178 if self._match(TokenType.L_PAREN): 5179 if is_struct: 5180 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5181 elif nested: 5182 expressions = self._parse_csv( 5183 lambda: self._parse_types( 5184 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5185 ) 5186 ) 5187 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5188 this = expressions[0] 5189 this.set("nullable", True) 5190 self._match_r_paren() 5191 return this 5192 elif type_token in self.ENUM_TYPE_TOKENS: 5193 expressions = self._parse_csv(self._parse_equality) 5194 elif is_aggregate: 5195 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5196 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5197 ) 5198 if not func_or_ident: 5199 return None 5200 expressions = [func_or_ident] 5201 if self._match(TokenType.COMMA): 5202 expressions.extend( 5203 self._parse_csv( 5204 lambda: self._parse_types( 5205 check_func=check_func, 5206 schema=schema, 5207 allow_identifiers=allow_identifiers, 5208 ) 5209 ) 5210 ) 5211 else: 5212 expressions = self._parse_csv(self._parse_type_size) 5213 5214 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5215 if type_token == TokenType.VECTOR and len(expressions) == 2: 5216 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5217 5218 if not expressions or not self._match(TokenType.R_PAREN): 5219 self._retreat(index) 5220 return None 5221 5222 maybe_func = True 5223 5224 values: t.Optional[t.List[exp.Expression]] = None 5225 5226 if nested and self._match(TokenType.LT): 5227 if is_struct: 5228 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5229 else: 5230 expressions = self._parse_csv( 5231 lambda: self._parse_types( 5232 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5233 ) 5234 ) 5235 5236 if not self._match(TokenType.GT): 5237 self.raise_error("Expecting >") 5238 5239 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5240 values = self._parse_csv(self._parse_assignment) 5241 if not values and is_struct: 5242 values = None 5243 self._retreat(self._index - 1) 5244 else: 5245 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5246 5247 if type_token in self.TIMESTAMPS: 5248 if self._match_text_seq("WITH", "TIME", "ZONE"): 5249 maybe_func = False 5250 tz_type = ( 5251 exp.DataType.Type.TIMETZ 5252 if type_token in self.TIMES 5253 else exp.DataType.Type.TIMESTAMPTZ 5254 ) 5255 this = exp.DataType(this=tz_type, expressions=expressions) 5256 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5257 maybe_func = False 5258 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5259 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5260 maybe_func = False 5261 elif type_token == TokenType.INTERVAL: 5262 unit = self._parse_var(upper=True) 5263 if unit: 5264 if self._match_text_seq("TO"): 5265 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5266 5267 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5268 else: 5269 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5270 elif type_token == TokenType.VOID: 5271 this = exp.DataType(this=exp.DataType.Type.NULL) 5272 5273 if maybe_func and check_func: 5274 index2 = self._index 5275 peek = self._parse_string() 5276 5277 if not peek: 5278 self._retreat(index) 5279 return None 5280 5281 self._retreat(index2) 5282 5283 if not this: 5284 if self._match_text_seq("UNSIGNED"): 5285 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5286 if not unsigned_type_token: 5287 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5288 5289 type_token = unsigned_type_token or type_token 5290 5291 this = exp.DataType( 5292 this=exp.DataType.Type[type_token.value], 5293 expressions=expressions, 5294 nested=nested, 5295 prefix=prefix, 5296 ) 5297 5298 # Empty arrays/structs are allowed 5299 if values is not None: 5300 cls = exp.Struct if is_struct else exp.Array 5301 this = exp.cast(cls(expressions=values), this, copy=False) 5302 5303 elif expressions: 5304 this.set("expressions", expressions) 5305 5306 # https://materialize.com/docs/sql/types/list/#type-name 5307 while self._match(TokenType.LIST): 5308 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5309 5310 index = self._index 5311 5312 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5313 matched_array = self._match(TokenType.ARRAY) 5314 5315 while self._curr: 5316 datatype_token = self._prev.token_type 5317 matched_l_bracket = self._match(TokenType.L_BRACKET) 5318 5319 if (not matched_l_bracket and not matched_array) or ( 5320 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5321 ): 5322 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5323 # not to be confused with the fixed size array parsing 5324 break 5325 5326 matched_array = False 5327 values = self._parse_csv(self._parse_assignment) or None 5328 if ( 5329 values 5330 and not schema 5331 and ( 5332 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5333 ) 5334 ): 5335 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5336 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5337 self._retreat(index) 5338 break 5339 5340 this = exp.DataType( 5341 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5342 ) 5343 self._match(TokenType.R_BRACKET) 5344 5345 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5346 converter = self.TYPE_CONVERTERS.get(this.this) 5347 if converter: 5348 this = converter(t.cast(exp.DataType, this)) 5349 5350 return this 5351 5352 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5353 index = self._index 5354 5355 if ( 5356 self._curr 5357 and self._next 5358 and self._curr.token_type in self.TYPE_TOKENS 5359 and self._next.token_type in self.TYPE_TOKENS 5360 ): 5361 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5362 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5363 this = self._parse_id_var() 5364 else: 5365 this = ( 5366 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5367 or self._parse_id_var() 5368 ) 5369 5370 self._match(TokenType.COLON) 5371 5372 if ( 5373 type_required 5374 and not isinstance(this, exp.DataType) 5375 and not self._match_set(self.TYPE_TOKENS, advance=False) 5376 ): 5377 self._retreat(index) 5378 return self._parse_types() 5379 5380 return self._parse_column_def(this) 5381 5382 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5383 if not self._match_text_seq("AT", "TIME", "ZONE"): 5384 return this 5385 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5386 5387 def _parse_column(self) -> t.Optional[exp.Expression]: 5388 this = self._parse_column_reference() 5389 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5390 5391 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5392 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5393 5394 return column 5395 5396 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5397 this = self._parse_field() 5398 if ( 5399 not this 5400 and self._match(TokenType.VALUES, advance=False) 5401 and self.VALUES_FOLLOWED_BY_PAREN 5402 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5403 ): 5404 this = self._parse_id_var() 5405 5406 if isinstance(this, exp.Identifier): 5407 # We bubble up comments from the Identifier to the Column 5408 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5409 5410 return this 5411 5412 def _parse_colon_as_variant_extract( 5413 self, this: t.Optional[exp.Expression] 5414 ) -> t.Optional[exp.Expression]: 5415 casts = [] 5416 json_path = [] 5417 escape = None 5418 5419 while self._match(TokenType.COLON): 5420 start_index = self._index 5421 5422 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5423 path = self._parse_column_ops( 5424 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5425 ) 5426 5427 # The cast :: operator has a lower precedence than the extraction operator :, so 5428 # we rearrange the AST appropriately to avoid casting the JSON path 5429 while isinstance(path, exp.Cast): 5430 casts.append(path.to) 5431 path = path.this 5432 5433 if casts: 5434 dcolon_offset = next( 5435 i 5436 for i, t in enumerate(self._tokens[start_index:]) 5437 if t.token_type == TokenType.DCOLON 5438 ) 5439 end_token = self._tokens[start_index + dcolon_offset - 1] 5440 else: 5441 end_token = self._prev 5442 5443 if path: 5444 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5445 # it'll roundtrip to a string literal in GET_PATH 5446 if isinstance(path, exp.Identifier) and path.quoted: 5447 escape = True 5448 5449 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5450 5451 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5452 # Databricks transforms it back to the colon/dot notation 5453 if json_path: 5454 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5455 5456 if json_path_expr: 5457 json_path_expr.set("escape", escape) 5458 5459 this = self.expression( 5460 exp.JSONExtract, 5461 this=this, 5462 expression=json_path_expr, 5463 variant_extract=True, 5464 ) 5465 5466 while casts: 5467 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5468 5469 return this 5470 5471 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5472 return self._parse_types() 5473 5474 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5475 this = self._parse_bracket(this) 5476 5477 while self._match_set(self.COLUMN_OPERATORS): 5478 op_token = self._prev.token_type 5479 op = self.COLUMN_OPERATORS.get(op_token) 5480 5481 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5482 field = self._parse_dcolon() 5483 if not field: 5484 self.raise_error("Expected type") 5485 elif op and self._curr: 5486 field = self._parse_column_reference() or self._parse_bracket() 5487 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5488 field = self._parse_column_ops(field) 5489 else: 5490 field = self._parse_field(any_token=True, anonymous_func=True) 5491 5492 if isinstance(field, (exp.Func, exp.Window)) and this: 5493 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5494 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5495 this = exp.replace_tree( 5496 this, 5497 lambda n: ( 5498 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5499 if n.table 5500 else n.this 5501 ) 5502 if isinstance(n, exp.Column) 5503 else n, 5504 ) 5505 5506 if op: 5507 this = op(self, this, field) 5508 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5509 this = self.expression( 5510 exp.Column, 5511 comments=this.comments, 5512 this=field, 5513 table=this.this, 5514 db=this.args.get("table"), 5515 catalog=this.args.get("db"), 5516 ) 5517 elif isinstance(field, exp.Window): 5518 # Move the exp.Dot's to the window's function 5519 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5520 field.set("this", window_func) 5521 this = field 5522 else: 5523 this = self.expression(exp.Dot, this=this, expression=field) 5524 5525 if field and field.comments: 5526 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5527 5528 this = self._parse_bracket(this) 5529 5530 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5531 5532 def _parse_primary(self) -> t.Optional[exp.Expression]: 5533 if self._match_set(self.PRIMARY_PARSERS): 5534 token_type = self._prev.token_type 5535 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5536 5537 if token_type == TokenType.STRING: 5538 expressions = [primary] 5539 while self._match(TokenType.STRING): 5540 expressions.append(exp.Literal.string(self._prev.text)) 5541 5542 if len(expressions) > 1: 5543 return self.expression(exp.Concat, expressions=expressions) 5544 5545 return primary 5546 5547 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5548 return exp.Literal.number(f"0.{self._prev.text}") 5549 5550 if self._match(TokenType.L_PAREN): 5551 comments = self._prev_comments 5552 query = self._parse_select() 5553 5554 if query: 5555 expressions = [query] 5556 else: 5557 expressions = self._parse_expressions() 5558 5559 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5560 5561 if not this and self._match(TokenType.R_PAREN, advance=False): 5562 this = self.expression(exp.Tuple) 5563 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5564 this = self._parse_subquery(this=this, parse_alias=False) 5565 elif isinstance(this, exp.Subquery): 5566 this = self._parse_subquery( 5567 this=self._parse_set_operations(this), parse_alias=False 5568 ) 5569 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5570 this = self.expression(exp.Tuple, expressions=expressions) 5571 else: 5572 this = self.expression(exp.Paren, this=this) 5573 5574 if this: 5575 this.add_comments(comments) 5576 5577 self._match_r_paren(expression=this) 5578 return this 5579 5580 return None 5581 5582 def _parse_field( 5583 self, 5584 any_token: bool = False, 5585 tokens: t.Optional[t.Collection[TokenType]] = None, 5586 anonymous_func: bool = False, 5587 ) -> t.Optional[exp.Expression]: 5588 if anonymous_func: 5589 field = ( 5590 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5591 or self._parse_primary() 5592 ) 5593 else: 5594 field = self._parse_primary() or self._parse_function( 5595 anonymous=anonymous_func, any_token=any_token 5596 ) 5597 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5598 5599 def _parse_function( 5600 self, 5601 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5602 anonymous: bool = False, 5603 optional_parens: bool = True, 5604 any_token: bool = False, 5605 ) -> t.Optional[exp.Expression]: 5606 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5607 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5608 fn_syntax = False 5609 if ( 5610 self._match(TokenType.L_BRACE, advance=False) 5611 and self._next 5612 and self._next.text.upper() == "FN" 5613 ): 5614 self._advance(2) 5615 fn_syntax = True 5616 5617 func = self._parse_function_call( 5618 functions=functions, 5619 anonymous=anonymous, 5620 optional_parens=optional_parens, 5621 any_token=any_token, 5622 ) 5623 5624 if fn_syntax: 5625 self._match(TokenType.R_BRACE) 5626 5627 return func 5628 5629 def _parse_function_call( 5630 self, 5631 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5632 anonymous: bool = False, 5633 optional_parens: bool = True, 5634 any_token: bool = False, 5635 ) -> t.Optional[exp.Expression]: 5636 if not self._curr: 5637 return None 5638 5639 comments = self._curr.comments 5640 token = self._curr 5641 token_type = self._curr.token_type 5642 this = self._curr.text 5643 upper = this.upper() 5644 5645 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5646 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5647 self._advance() 5648 return self._parse_window(parser(self)) 5649 5650 if not self._next or self._next.token_type != TokenType.L_PAREN: 5651 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5652 self._advance() 5653 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5654 5655 return None 5656 5657 if any_token: 5658 if token_type in self.RESERVED_TOKENS: 5659 return None 5660 elif token_type not in self.FUNC_TOKENS: 5661 return None 5662 5663 self._advance(2) 5664 5665 parser = self.FUNCTION_PARSERS.get(upper) 5666 if parser and not anonymous: 5667 this = parser(self) 5668 else: 5669 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5670 5671 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5672 this = self.expression( 5673 subquery_predicate, comments=comments, this=self._parse_select() 5674 ) 5675 self._match_r_paren() 5676 return this 5677 5678 if functions is None: 5679 functions = self.FUNCTIONS 5680 5681 function = functions.get(upper) 5682 known_function = function and not anonymous 5683 5684 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5685 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5686 5687 post_func_comments = self._curr and self._curr.comments 5688 if known_function and post_func_comments: 5689 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5690 # call we'll construct it as exp.Anonymous, even if it's "known" 5691 if any( 5692 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5693 for comment in post_func_comments 5694 ): 5695 known_function = False 5696 5697 if alias and known_function: 5698 args = self._kv_to_prop_eq(args) 5699 5700 if known_function: 5701 func_builder = t.cast(t.Callable, function) 5702 5703 if "dialect" in func_builder.__code__.co_varnames: 5704 func = func_builder(args, dialect=self.dialect) 5705 else: 5706 func = func_builder(args) 5707 5708 func = self.validate_expression(func, args) 5709 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5710 func.meta["name"] = this 5711 5712 this = func 5713 else: 5714 if token_type == TokenType.IDENTIFIER: 5715 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5716 this = self.expression(exp.Anonymous, this=this, expressions=args) 5717 5718 if isinstance(this, exp.Expression): 5719 this.add_comments(comments) 5720 5721 self._match_r_paren(this) 5722 return self._parse_window(this) 5723 5724 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5725 return expression 5726 5727 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5728 transformed = [] 5729 5730 for index, e in enumerate(expressions): 5731 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5732 if isinstance(e, exp.Alias): 5733 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5734 5735 if not isinstance(e, exp.PropertyEQ): 5736 e = self.expression( 5737 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5738 ) 5739 5740 if isinstance(e.this, exp.Column): 5741 e.this.replace(e.this.this) 5742 else: 5743 e = self._to_prop_eq(e, index) 5744 5745 transformed.append(e) 5746 5747 return transformed 5748 5749 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5750 return self._parse_statement() 5751 5752 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5753 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5754 5755 def _parse_user_defined_function( 5756 self, kind: t.Optional[TokenType] = None 5757 ) -> t.Optional[exp.Expression]: 5758 this = self._parse_table_parts(schema=True) 5759 5760 if not self._match(TokenType.L_PAREN): 5761 return this 5762 5763 expressions = self._parse_csv(self._parse_function_parameter) 5764 self._match_r_paren() 5765 return self.expression( 5766 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5767 ) 5768 5769 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5770 literal = self._parse_primary() 5771 if literal: 5772 return self.expression(exp.Introducer, this=token.text, expression=literal) 5773 5774 return self._identifier_expression(token) 5775 5776 def _parse_session_parameter(self) -> exp.SessionParameter: 5777 kind = None 5778 this = self._parse_id_var() or self._parse_primary() 5779 5780 if this and self._match(TokenType.DOT): 5781 kind = this.name 5782 this = self._parse_var() or self._parse_primary() 5783 5784 return self.expression(exp.SessionParameter, this=this, kind=kind) 5785 5786 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5787 return self._parse_id_var() 5788 5789 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5790 index = self._index 5791 5792 if self._match(TokenType.L_PAREN): 5793 expressions = t.cast( 5794 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5795 ) 5796 5797 if not self._match(TokenType.R_PAREN): 5798 self._retreat(index) 5799 else: 5800 expressions = [self._parse_lambda_arg()] 5801 5802 if self._match_set(self.LAMBDAS): 5803 return self.LAMBDAS[self._prev.token_type](self, expressions) 5804 5805 self._retreat(index) 5806 5807 this: t.Optional[exp.Expression] 5808 5809 if self._match(TokenType.DISTINCT): 5810 this = self.expression( 5811 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5812 ) 5813 else: 5814 this = self._parse_select_or_expression(alias=alias) 5815 5816 return self._parse_limit( 5817 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5818 ) 5819 5820 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5821 index = self._index 5822 if not self._match(TokenType.L_PAREN): 5823 return this 5824 5825 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5826 # expr can be of both types 5827 if self._match_set(self.SELECT_START_TOKENS): 5828 self._retreat(index) 5829 return this 5830 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5831 self._match_r_paren() 5832 return self.expression(exp.Schema, this=this, expressions=args) 5833 5834 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5835 return self._parse_column_def(self._parse_field(any_token=True)) 5836 5837 def _parse_column_def( 5838 self, this: t.Optional[exp.Expression], computed_column: bool = True 5839 ) -> t.Optional[exp.Expression]: 5840 # column defs are not really columns, they're identifiers 5841 if isinstance(this, exp.Column): 5842 this = this.this 5843 5844 if not computed_column: 5845 self._match(TokenType.ALIAS) 5846 5847 kind = self._parse_types(schema=True) 5848 5849 if self._match_text_seq("FOR", "ORDINALITY"): 5850 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5851 5852 constraints: t.List[exp.Expression] = [] 5853 5854 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5855 ("ALIAS", "MATERIALIZED") 5856 ): 5857 persisted = self._prev.text.upper() == "MATERIALIZED" 5858 constraint_kind = exp.ComputedColumnConstraint( 5859 this=self._parse_assignment(), 5860 persisted=persisted or self._match_text_seq("PERSISTED"), 5861 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5862 ) 5863 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5864 elif ( 5865 kind 5866 and self._match(TokenType.ALIAS, advance=False) 5867 and ( 5868 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5869 or (self._next and self._next.token_type == TokenType.L_PAREN) 5870 ) 5871 ): 5872 self._advance() 5873 constraints.append( 5874 self.expression( 5875 exp.ColumnConstraint, 5876 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5877 ) 5878 ) 5879 5880 while True: 5881 constraint = self._parse_column_constraint() 5882 if not constraint: 5883 break 5884 constraints.append(constraint) 5885 5886 if not kind and not constraints: 5887 return this 5888 5889 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5890 5891 def _parse_auto_increment( 5892 self, 5893 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5894 start = None 5895 increment = None 5896 5897 if self._match(TokenType.L_PAREN, advance=False): 5898 args = self._parse_wrapped_csv(self._parse_bitwise) 5899 start = seq_get(args, 0) 5900 increment = seq_get(args, 1) 5901 elif self._match_text_seq("START"): 5902 start = self._parse_bitwise() 5903 self._match_text_seq("INCREMENT") 5904 increment = self._parse_bitwise() 5905 5906 if start and increment: 5907 return exp.GeneratedAsIdentityColumnConstraint( 5908 start=start, increment=increment, this=False 5909 ) 5910 5911 return exp.AutoIncrementColumnConstraint() 5912 5913 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5914 if not self._match_text_seq("REFRESH"): 5915 self._retreat(self._index - 1) 5916 return None 5917 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5918 5919 def _parse_compress(self) -> exp.CompressColumnConstraint: 5920 if self._match(TokenType.L_PAREN, advance=False): 5921 return self.expression( 5922 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5923 ) 5924 5925 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5926 5927 def _parse_generated_as_identity( 5928 self, 5929 ) -> ( 5930 exp.GeneratedAsIdentityColumnConstraint 5931 | exp.ComputedColumnConstraint 5932 | exp.GeneratedAsRowColumnConstraint 5933 ): 5934 if self._match_text_seq("BY", "DEFAULT"): 5935 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5936 this = self.expression( 5937 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5938 ) 5939 else: 5940 self._match_text_seq("ALWAYS") 5941 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5942 5943 self._match(TokenType.ALIAS) 5944 5945 if self._match_text_seq("ROW"): 5946 start = self._match_text_seq("START") 5947 if not start: 5948 self._match(TokenType.END) 5949 hidden = self._match_text_seq("HIDDEN") 5950 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5951 5952 identity = self._match_text_seq("IDENTITY") 5953 5954 if self._match(TokenType.L_PAREN): 5955 if self._match(TokenType.START_WITH): 5956 this.set("start", self._parse_bitwise()) 5957 if self._match_text_seq("INCREMENT", "BY"): 5958 this.set("increment", self._parse_bitwise()) 5959 if self._match_text_seq("MINVALUE"): 5960 this.set("minvalue", self._parse_bitwise()) 5961 if self._match_text_seq("MAXVALUE"): 5962 this.set("maxvalue", self._parse_bitwise()) 5963 5964 if self._match_text_seq("CYCLE"): 5965 this.set("cycle", True) 5966 elif self._match_text_seq("NO", "CYCLE"): 5967 this.set("cycle", False) 5968 5969 if not identity: 5970 this.set("expression", self._parse_range()) 5971 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5972 args = self._parse_csv(self._parse_bitwise) 5973 this.set("start", seq_get(args, 0)) 5974 this.set("increment", seq_get(args, 1)) 5975 5976 self._match_r_paren() 5977 5978 return this 5979 5980 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5981 self._match_text_seq("LENGTH") 5982 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5983 5984 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5985 if self._match_text_seq("NULL"): 5986 return self.expression(exp.NotNullColumnConstraint) 5987 if self._match_text_seq("CASESPECIFIC"): 5988 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5989 if self._match_text_seq("FOR", "REPLICATION"): 5990 return self.expression(exp.NotForReplicationColumnConstraint) 5991 5992 # Unconsume the `NOT` token 5993 self._retreat(self._index - 1) 5994 return None 5995 5996 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5997 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5998 5999 procedure_option_follows = ( 6000 self._match(TokenType.WITH, advance=False) 6001 and self._next 6002 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6003 ) 6004 6005 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6006 return self.expression( 6007 exp.ColumnConstraint, 6008 this=this, 6009 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6010 ) 6011 6012 return this 6013 6014 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6015 if not self._match(TokenType.CONSTRAINT): 6016 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6017 6018 return self.expression( 6019 exp.Constraint, 6020 this=self._parse_id_var(), 6021 expressions=self._parse_unnamed_constraints(), 6022 ) 6023 6024 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6025 constraints = [] 6026 while True: 6027 constraint = self._parse_unnamed_constraint() or self._parse_function() 6028 if not constraint: 6029 break 6030 constraints.append(constraint) 6031 6032 return constraints 6033 6034 def _parse_unnamed_constraint( 6035 self, constraints: t.Optional[t.Collection[str]] = None 6036 ) -> t.Optional[exp.Expression]: 6037 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6038 constraints or self.CONSTRAINT_PARSERS 6039 ): 6040 return None 6041 6042 constraint = self._prev.text.upper() 6043 if constraint not in self.CONSTRAINT_PARSERS: 6044 self.raise_error(f"No parser found for schema constraint {constraint}.") 6045 6046 return self.CONSTRAINT_PARSERS[constraint](self) 6047 6048 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6049 return self._parse_id_var(any_token=False) 6050 6051 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6052 self._match_text_seq("KEY") 6053 return self.expression( 6054 exp.UniqueColumnConstraint, 6055 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6056 this=self._parse_schema(self._parse_unique_key()), 6057 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6058 on_conflict=self._parse_on_conflict(), 6059 options=self._parse_key_constraint_options(), 6060 ) 6061 6062 def _parse_key_constraint_options(self) -> t.List[str]: 6063 options = [] 6064 while True: 6065 if not self._curr: 6066 break 6067 6068 if self._match(TokenType.ON): 6069 action = None 6070 on = self._advance_any() and self._prev.text 6071 6072 if self._match_text_seq("NO", "ACTION"): 6073 action = "NO ACTION" 6074 elif self._match_text_seq("CASCADE"): 6075 action = "CASCADE" 6076 elif self._match_text_seq("RESTRICT"): 6077 action = "RESTRICT" 6078 elif self._match_pair(TokenType.SET, TokenType.NULL): 6079 action = "SET NULL" 6080 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6081 action = "SET DEFAULT" 6082 else: 6083 self.raise_error("Invalid key constraint") 6084 6085 options.append(f"ON {on} {action}") 6086 else: 6087 var = self._parse_var_from_options( 6088 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6089 ) 6090 if not var: 6091 break 6092 options.append(var.name) 6093 6094 return options 6095 6096 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6097 if match and not self._match(TokenType.REFERENCES): 6098 return None 6099 6100 expressions = None 6101 this = self._parse_table(schema=True) 6102 options = self._parse_key_constraint_options() 6103 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6104 6105 def _parse_foreign_key(self) -> exp.ForeignKey: 6106 expressions = ( 6107 self._parse_wrapped_id_vars() 6108 if not self._match(TokenType.REFERENCES, advance=False) 6109 else None 6110 ) 6111 reference = self._parse_references() 6112 on_options = {} 6113 6114 while self._match(TokenType.ON): 6115 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6116 self.raise_error("Expected DELETE or UPDATE") 6117 6118 kind = self._prev.text.lower() 6119 6120 if self._match_text_seq("NO", "ACTION"): 6121 action = "NO ACTION" 6122 elif self._match(TokenType.SET): 6123 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6124 action = "SET " + self._prev.text.upper() 6125 else: 6126 self._advance() 6127 action = self._prev.text.upper() 6128 6129 on_options[kind] = action 6130 6131 return self.expression( 6132 exp.ForeignKey, 6133 expressions=expressions, 6134 reference=reference, 6135 options=self._parse_key_constraint_options(), 6136 **on_options, # type: ignore 6137 ) 6138 6139 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6140 return self._parse_ordered() or self._parse_field() 6141 6142 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6143 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6144 self._retreat(self._index - 1) 6145 return None 6146 6147 id_vars = self._parse_wrapped_id_vars() 6148 return self.expression( 6149 exp.PeriodForSystemTimeConstraint, 6150 this=seq_get(id_vars, 0), 6151 expression=seq_get(id_vars, 1), 6152 ) 6153 6154 def _parse_primary_key( 6155 self, wrapped_optional: bool = False, in_props: bool = False 6156 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6157 desc = ( 6158 self._match_set((TokenType.ASC, TokenType.DESC)) 6159 and self._prev.token_type == TokenType.DESC 6160 ) 6161 6162 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6163 return self.expression( 6164 exp.PrimaryKeyColumnConstraint, 6165 desc=desc, 6166 options=self._parse_key_constraint_options(), 6167 ) 6168 6169 expressions = self._parse_wrapped_csv( 6170 self._parse_primary_key_part, optional=wrapped_optional 6171 ) 6172 options = self._parse_key_constraint_options() 6173 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6174 6175 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6176 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6177 6178 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6179 """ 6180 Parses a datetime column in ODBC format. We parse the column into the corresponding 6181 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6182 same as we did for `DATE('yyyy-mm-dd')`. 6183 6184 Reference: 6185 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6186 """ 6187 self._match(TokenType.VAR) 6188 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6189 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6190 if not self._match(TokenType.R_BRACE): 6191 self.raise_error("Expected }") 6192 return expression 6193 6194 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6195 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6196 return this 6197 6198 bracket_kind = self._prev.token_type 6199 if ( 6200 bracket_kind == TokenType.L_BRACE 6201 and self._curr 6202 and self._curr.token_type == TokenType.VAR 6203 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6204 ): 6205 return self._parse_odbc_datetime_literal() 6206 6207 expressions = self._parse_csv( 6208 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6209 ) 6210 6211 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6212 self.raise_error("Expected ]") 6213 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6214 self.raise_error("Expected }") 6215 6216 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6217 if bracket_kind == TokenType.L_BRACE: 6218 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6219 elif not this: 6220 this = build_array_constructor( 6221 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6222 ) 6223 else: 6224 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6225 if constructor_type: 6226 return build_array_constructor( 6227 constructor_type, 6228 args=expressions, 6229 bracket_kind=bracket_kind, 6230 dialect=self.dialect, 6231 ) 6232 6233 expressions = apply_index_offset( 6234 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6235 ) 6236 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6237 6238 self._add_comments(this) 6239 return self._parse_bracket(this) 6240 6241 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6242 if self._match(TokenType.COLON): 6243 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6244 return this 6245 6246 def _parse_case(self) -> t.Optional[exp.Expression]: 6247 ifs = [] 6248 default = None 6249 6250 comments = self._prev_comments 6251 expression = self._parse_assignment() 6252 6253 while self._match(TokenType.WHEN): 6254 this = self._parse_assignment() 6255 self._match(TokenType.THEN) 6256 then = self._parse_assignment() 6257 ifs.append(self.expression(exp.If, this=this, true=then)) 6258 6259 if self._match(TokenType.ELSE): 6260 default = self._parse_assignment() 6261 6262 if not self._match(TokenType.END): 6263 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6264 default = exp.column("interval") 6265 else: 6266 self.raise_error("Expected END after CASE", self._prev) 6267 6268 return self.expression( 6269 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6270 ) 6271 6272 def _parse_if(self) -> t.Optional[exp.Expression]: 6273 if self._match(TokenType.L_PAREN): 6274 args = self._parse_csv( 6275 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6276 ) 6277 this = self.validate_expression(exp.If.from_arg_list(args), args) 6278 self._match_r_paren() 6279 else: 6280 index = self._index - 1 6281 6282 if self.NO_PAREN_IF_COMMANDS and index == 0: 6283 return self._parse_as_command(self._prev) 6284 6285 condition = self._parse_assignment() 6286 6287 if not condition: 6288 self._retreat(index) 6289 return None 6290 6291 self._match(TokenType.THEN) 6292 true = self._parse_assignment() 6293 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6294 self._match(TokenType.END) 6295 this = self.expression(exp.If, this=condition, true=true, false=false) 6296 6297 return this 6298 6299 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6300 if not self._match_text_seq("VALUE", "FOR"): 6301 self._retreat(self._index - 1) 6302 return None 6303 6304 return self.expression( 6305 exp.NextValueFor, 6306 this=self._parse_column(), 6307 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6308 ) 6309 6310 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6311 this = self._parse_function() or self._parse_var_or_string(upper=True) 6312 6313 if self._match(TokenType.FROM): 6314 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6315 6316 if not self._match(TokenType.COMMA): 6317 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6318 6319 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6320 6321 def _parse_gap_fill(self) -> exp.GapFill: 6322 self._match(TokenType.TABLE) 6323 this = self._parse_table() 6324 6325 self._match(TokenType.COMMA) 6326 args = [this, *self._parse_csv(self._parse_lambda)] 6327 6328 gap_fill = exp.GapFill.from_arg_list(args) 6329 return self.validate_expression(gap_fill, args) 6330 6331 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6332 this = self._parse_assignment() 6333 6334 if not self._match(TokenType.ALIAS): 6335 if self._match(TokenType.COMMA): 6336 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6337 6338 self.raise_error("Expected AS after CAST") 6339 6340 fmt = None 6341 to = self._parse_types() 6342 6343 default = self._match(TokenType.DEFAULT) 6344 if default: 6345 default = self._parse_bitwise() 6346 self._match_text_seq("ON", "CONVERSION", "ERROR") 6347 6348 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6349 fmt_string = self._parse_string() 6350 fmt = self._parse_at_time_zone(fmt_string) 6351 6352 if not to: 6353 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6354 if to.this in exp.DataType.TEMPORAL_TYPES: 6355 this = self.expression( 6356 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6357 this=this, 6358 format=exp.Literal.string( 6359 format_time( 6360 fmt_string.this if fmt_string else "", 6361 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6362 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6363 ) 6364 ), 6365 safe=safe, 6366 ) 6367 6368 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6369 this.set("zone", fmt.args["zone"]) 6370 return this 6371 elif not to: 6372 self.raise_error("Expected TYPE after CAST") 6373 elif isinstance(to, exp.Identifier): 6374 to = exp.DataType.build(to.name, udt=True) 6375 elif to.this == exp.DataType.Type.CHAR: 6376 if self._match(TokenType.CHARACTER_SET): 6377 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6378 6379 return self.expression( 6380 exp.Cast if strict else exp.TryCast, 6381 this=this, 6382 to=to, 6383 format=fmt, 6384 safe=safe, 6385 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6386 default=default, 6387 ) 6388 6389 def _parse_string_agg(self) -> exp.GroupConcat: 6390 if self._match(TokenType.DISTINCT): 6391 args: t.List[t.Optional[exp.Expression]] = [ 6392 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6393 ] 6394 if self._match(TokenType.COMMA): 6395 args.extend(self._parse_csv(self._parse_assignment)) 6396 else: 6397 args = self._parse_csv(self._parse_assignment) # type: ignore 6398 6399 if self._match_text_seq("ON", "OVERFLOW"): 6400 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6401 if self._match_text_seq("ERROR"): 6402 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6403 else: 6404 self._match_text_seq("TRUNCATE") 6405 on_overflow = self.expression( 6406 exp.OverflowTruncateBehavior, 6407 this=self._parse_string(), 6408 with_count=( 6409 self._match_text_seq("WITH", "COUNT") 6410 or not self._match_text_seq("WITHOUT", "COUNT") 6411 ), 6412 ) 6413 else: 6414 on_overflow = None 6415 6416 index = self._index 6417 if not self._match(TokenType.R_PAREN) and args: 6418 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6419 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6420 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6421 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6422 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6423 6424 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6425 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6426 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6427 if not self._match_text_seq("WITHIN", "GROUP"): 6428 self._retreat(index) 6429 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6430 6431 # The corresponding match_r_paren will be called in parse_function (caller) 6432 self._match_l_paren() 6433 6434 return self.expression( 6435 exp.GroupConcat, 6436 this=self._parse_order(this=seq_get(args, 0)), 6437 separator=seq_get(args, 1), 6438 on_overflow=on_overflow, 6439 ) 6440 6441 def _parse_convert( 6442 self, strict: bool, safe: t.Optional[bool] = None 6443 ) -> t.Optional[exp.Expression]: 6444 this = self._parse_bitwise() 6445 6446 if self._match(TokenType.USING): 6447 to: t.Optional[exp.Expression] = self.expression( 6448 exp.CharacterSet, this=self._parse_var() 6449 ) 6450 elif self._match(TokenType.COMMA): 6451 to = self._parse_types() 6452 else: 6453 to = None 6454 6455 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6456 6457 def _parse_xml_table(self) -> exp.XMLTable: 6458 namespaces = None 6459 passing = None 6460 columns = None 6461 6462 if self._match_text_seq("XMLNAMESPACES", "("): 6463 namespaces = self._parse_xml_namespace() 6464 self._match_text_seq(")", ",") 6465 6466 this = self._parse_string() 6467 6468 if self._match_text_seq("PASSING"): 6469 # The BY VALUE keywords are optional and are provided for semantic clarity 6470 self._match_text_seq("BY", "VALUE") 6471 passing = self._parse_csv(self._parse_column) 6472 6473 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6474 6475 if self._match_text_seq("COLUMNS"): 6476 columns = self._parse_csv(self._parse_field_def) 6477 6478 return self.expression( 6479 exp.XMLTable, 6480 this=this, 6481 namespaces=namespaces, 6482 passing=passing, 6483 columns=columns, 6484 by_ref=by_ref, 6485 ) 6486 6487 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6488 namespaces = [] 6489 6490 while True: 6491 if self._match(TokenType.DEFAULT): 6492 uri = self._parse_string() 6493 else: 6494 uri = self._parse_alias(self._parse_string()) 6495 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6496 if not self._match(TokenType.COMMA): 6497 break 6498 6499 return namespaces 6500 6501 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6502 """ 6503 There are generally two variants of the DECODE function: 6504 6505 - DECODE(bin, charset) 6506 - DECODE(expression, search, result [, search, result] ... [, default]) 6507 6508 The second variant will always be parsed into a CASE expression. Note that NULL 6509 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6510 instead of relying on pattern matching. 6511 """ 6512 args = self._parse_csv(self._parse_assignment) 6513 6514 if len(args) < 3: 6515 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6516 6517 expression, *expressions = args 6518 if not expression: 6519 return None 6520 6521 ifs = [] 6522 for search, result in zip(expressions[::2], expressions[1::2]): 6523 if not search or not result: 6524 return None 6525 6526 if isinstance(search, exp.Literal): 6527 ifs.append( 6528 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6529 ) 6530 elif isinstance(search, exp.Null): 6531 ifs.append( 6532 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6533 ) 6534 else: 6535 cond = exp.or_( 6536 exp.EQ(this=expression.copy(), expression=search), 6537 exp.and_( 6538 exp.Is(this=expression.copy(), expression=exp.Null()), 6539 exp.Is(this=search.copy(), expression=exp.Null()), 6540 copy=False, 6541 ), 6542 copy=False, 6543 ) 6544 ifs.append(exp.If(this=cond, true=result)) 6545 6546 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6547 6548 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6549 self._match_text_seq("KEY") 6550 key = self._parse_column() 6551 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6552 self._match_text_seq("VALUE") 6553 value = self._parse_bitwise() 6554 6555 if not key and not value: 6556 return None 6557 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6558 6559 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6560 if not this or not self._match_text_seq("FORMAT", "JSON"): 6561 return this 6562 6563 return self.expression(exp.FormatJson, this=this) 6564 6565 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6566 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6567 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6568 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6569 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6570 else: 6571 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6572 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6573 6574 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6575 6576 if not empty and not error and not null: 6577 return None 6578 6579 return self.expression( 6580 exp.OnCondition, 6581 empty=empty, 6582 error=error, 6583 null=null, 6584 ) 6585 6586 def _parse_on_handling( 6587 self, on: str, *values: str 6588 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6589 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6590 for value in values: 6591 if self._match_text_seq(value, "ON", on): 6592 return f"{value} ON {on}" 6593 6594 index = self._index 6595 if self._match(TokenType.DEFAULT): 6596 default_value = self._parse_bitwise() 6597 if self._match_text_seq("ON", on): 6598 return default_value 6599 6600 self._retreat(index) 6601 6602 return None 6603 6604 @t.overload 6605 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6606 6607 @t.overload 6608 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6609 6610 def _parse_json_object(self, agg=False): 6611 star = self._parse_star() 6612 expressions = ( 6613 [star] 6614 if star 6615 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6616 ) 6617 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6618 6619 unique_keys = None 6620 if self._match_text_seq("WITH", "UNIQUE"): 6621 unique_keys = True 6622 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6623 unique_keys = False 6624 6625 self._match_text_seq("KEYS") 6626 6627 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6628 self._parse_type() 6629 ) 6630 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6631 6632 return self.expression( 6633 exp.JSONObjectAgg if agg else exp.JSONObject, 6634 expressions=expressions, 6635 null_handling=null_handling, 6636 unique_keys=unique_keys, 6637 return_type=return_type, 6638 encoding=encoding, 6639 ) 6640 6641 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6642 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6643 if not self._match_text_seq("NESTED"): 6644 this = self._parse_id_var() 6645 kind = self._parse_types(allow_identifiers=False) 6646 nested = None 6647 else: 6648 this = None 6649 kind = None 6650 nested = True 6651 6652 path = self._match_text_seq("PATH") and self._parse_string() 6653 nested_schema = nested and self._parse_json_schema() 6654 6655 return self.expression( 6656 exp.JSONColumnDef, 6657 this=this, 6658 kind=kind, 6659 path=path, 6660 nested_schema=nested_schema, 6661 ) 6662 6663 def _parse_json_schema(self) -> exp.JSONSchema: 6664 self._match_text_seq("COLUMNS") 6665 return self.expression( 6666 exp.JSONSchema, 6667 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6668 ) 6669 6670 def _parse_json_table(self) -> exp.JSONTable: 6671 this = self._parse_format_json(self._parse_bitwise()) 6672 path = self._match(TokenType.COMMA) and self._parse_string() 6673 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6674 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6675 schema = self._parse_json_schema() 6676 6677 return exp.JSONTable( 6678 this=this, 6679 schema=schema, 6680 path=path, 6681 error_handling=error_handling, 6682 empty_handling=empty_handling, 6683 ) 6684 6685 def _parse_match_against(self) -> exp.MatchAgainst: 6686 expressions = self._parse_csv(self._parse_column) 6687 6688 self._match_text_seq(")", "AGAINST", "(") 6689 6690 this = self._parse_string() 6691 6692 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6693 modifier = "IN NATURAL LANGUAGE MODE" 6694 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6695 modifier = f"{modifier} WITH QUERY EXPANSION" 6696 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6697 modifier = "IN BOOLEAN MODE" 6698 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6699 modifier = "WITH QUERY EXPANSION" 6700 else: 6701 modifier = None 6702 6703 return self.expression( 6704 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6705 ) 6706 6707 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6708 def _parse_open_json(self) -> exp.OpenJSON: 6709 this = self._parse_bitwise() 6710 path = self._match(TokenType.COMMA) and self._parse_string() 6711 6712 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6713 this = self._parse_field(any_token=True) 6714 kind = self._parse_types() 6715 path = self._parse_string() 6716 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6717 6718 return self.expression( 6719 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6720 ) 6721 6722 expressions = None 6723 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6724 self._match_l_paren() 6725 expressions = self._parse_csv(_parse_open_json_column_def) 6726 6727 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6728 6729 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6730 args = self._parse_csv(self._parse_bitwise) 6731 6732 if self._match(TokenType.IN): 6733 return self.expression( 6734 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6735 ) 6736 6737 if haystack_first: 6738 haystack = seq_get(args, 0) 6739 needle = seq_get(args, 1) 6740 else: 6741 haystack = seq_get(args, 1) 6742 needle = seq_get(args, 0) 6743 6744 return self.expression( 6745 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6746 ) 6747 6748 def _parse_predict(self) -> exp.Predict: 6749 self._match_text_seq("MODEL") 6750 this = self._parse_table() 6751 6752 self._match(TokenType.COMMA) 6753 self._match_text_seq("TABLE") 6754 6755 return self.expression( 6756 exp.Predict, 6757 this=this, 6758 expression=self._parse_table(), 6759 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6760 ) 6761 6762 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6763 args = self._parse_csv(self._parse_table) 6764 return exp.JoinHint(this=func_name.upper(), expressions=args) 6765 6766 def _parse_substring(self) -> exp.Substring: 6767 # Postgres supports the form: substring(string [from int] [for int]) 6768 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6769 6770 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6771 6772 if self._match(TokenType.FROM): 6773 args.append(self._parse_bitwise()) 6774 if self._match(TokenType.FOR): 6775 if len(args) == 1: 6776 args.append(exp.Literal.number(1)) 6777 args.append(self._parse_bitwise()) 6778 6779 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6780 6781 def _parse_trim(self) -> exp.Trim: 6782 # https://www.w3resource.com/sql/character-functions/trim.php 6783 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6784 6785 position = None 6786 collation = None 6787 expression = None 6788 6789 if self._match_texts(self.TRIM_TYPES): 6790 position = self._prev.text.upper() 6791 6792 this = self._parse_bitwise() 6793 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6794 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6795 expression = self._parse_bitwise() 6796 6797 if invert_order: 6798 this, expression = expression, this 6799 6800 if self._match(TokenType.COLLATE): 6801 collation = self._parse_bitwise() 6802 6803 return self.expression( 6804 exp.Trim, this=this, position=position, expression=expression, collation=collation 6805 ) 6806 6807 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6808 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6809 6810 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6811 return self._parse_window(self._parse_id_var(), alias=True) 6812 6813 def _parse_respect_or_ignore_nulls( 6814 self, this: t.Optional[exp.Expression] 6815 ) -> t.Optional[exp.Expression]: 6816 if self._match_text_seq("IGNORE", "NULLS"): 6817 return self.expression(exp.IgnoreNulls, this=this) 6818 if self._match_text_seq("RESPECT", "NULLS"): 6819 return self.expression(exp.RespectNulls, this=this) 6820 return this 6821 6822 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6823 if self._match(TokenType.HAVING): 6824 self._match_texts(("MAX", "MIN")) 6825 max = self._prev.text.upper() != "MIN" 6826 return self.expression( 6827 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6828 ) 6829 6830 return this 6831 6832 def _parse_window( 6833 self, this: t.Optional[exp.Expression], alias: bool = False 6834 ) -> t.Optional[exp.Expression]: 6835 func = this 6836 comments = func.comments if isinstance(func, exp.Expression) else None 6837 6838 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6839 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6840 if self._match_text_seq("WITHIN", "GROUP"): 6841 order = self._parse_wrapped(self._parse_order) 6842 this = self.expression(exp.WithinGroup, this=this, expression=order) 6843 6844 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6845 self._match(TokenType.WHERE) 6846 this = self.expression( 6847 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6848 ) 6849 self._match_r_paren() 6850 6851 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6852 # Some dialects choose to implement and some do not. 6853 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6854 6855 # There is some code above in _parse_lambda that handles 6856 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6857 6858 # The below changes handle 6859 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6860 6861 # Oracle allows both formats 6862 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6863 # and Snowflake chose to do the same for familiarity 6864 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6865 if isinstance(this, exp.AggFunc): 6866 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6867 6868 if ignore_respect and ignore_respect is not this: 6869 ignore_respect.replace(ignore_respect.this) 6870 this = self.expression(ignore_respect.__class__, this=this) 6871 6872 this = self._parse_respect_or_ignore_nulls(this) 6873 6874 # bigquery select from window x AS (partition by ...) 6875 if alias: 6876 over = None 6877 self._match(TokenType.ALIAS) 6878 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6879 return this 6880 else: 6881 over = self._prev.text.upper() 6882 6883 if comments and isinstance(func, exp.Expression): 6884 func.pop_comments() 6885 6886 if not self._match(TokenType.L_PAREN): 6887 return self.expression( 6888 exp.Window, 6889 comments=comments, 6890 this=this, 6891 alias=self._parse_id_var(False), 6892 over=over, 6893 ) 6894 6895 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6896 6897 first = self._match(TokenType.FIRST) 6898 if self._match_text_seq("LAST"): 6899 first = False 6900 6901 partition, order = self._parse_partition_and_order() 6902 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6903 6904 if kind: 6905 self._match(TokenType.BETWEEN) 6906 start = self._parse_window_spec() 6907 self._match(TokenType.AND) 6908 end = self._parse_window_spec() 6909 exclude = ( 6910 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6911 if self._match_text_seq("EXCLUDE") 6912 else None 6913 ) 6914 6915 spec = self.expression( 6916 exp.WindowSpec, 6917 kind=kind, 6918 start=start["value"], 6919 start_side=start["side"], 6920 end=end["value"], 6921 end_side=end["side"], 6922 exclude=exclude, 6923 ) 6924 else: 6925 spec = None 6926 6927 self._match_r_paren() 6928 6929 window = self.expression( 6930 exp.Window, 6931 comments=comments, 6932 this=this, 6933 partition_by=partition, 6934 order=order, 6935 spec=spec, 6936 alias=window_alias, 6937 over=over, 6938 first=first, 6939 ) 6940 6941 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6942 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6943 return self._parse_window(window, alias=alias) 6944 6945 return window 6946 6947 def _parse_partition_and_order( 6948 self, 6949 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6950 return self._parse_partition_by(), self._parse_order() 6951 6952 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6953 self._match(TokenType.BETWEEN) 6954 6955 return { 6956 "value": ( 6957 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6958 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6959 or self._parse_bitwise() 6960 ), 6961 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6962 } 6963 6964 def _parse_alias( 6965 self, this: t.Optional[exp.Expression], explicit: bool = False 6966 ) -> t.Optional[exp.Expression]: 6967 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6968 # so this section tries to parse the clause version and if it fails, it treats the token 6969 # as an identifier (alias) 6970 if self._can_parse_limit_or_offset(): 6971 return this 6972 6973 any_token = self._match(TokenType.ALIAS) 6974 comments = self._prev_comments or [] 6975 6976 if explicit and not any_token: 6977 return this 6978 6979 if self._match(TokenType.L_PAREN): 6980 aliases = self.expression( 6981 exp.Aliases, 6982 comments=comments, 6983 this=this, 6984 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6985 ) 6986 self._match_r_paren(aliases) 6987 return aliases 6988 6989 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6990 self.STRING_ALIASES and self._parse_string_as_identifier() 6991 ) 6992 6993 if alias: 6994 comments.extend(alias.pop_comments()) 6995 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6996 column = this.this 6997 6998 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6999 if not this.comments and column and column.comments: 7000 this.comments = column.pop_comments() 7001 7002 return this 7003 7004 def _parse_id_var( 7005 self, 7006 any_token: bool = True, 7007 tokens: t.Optional[t.Collection[TokenType]] = None, 7008 ) -> t.Optional[exp.Expression]: 7009 expression = self._parse_identifier() 7010 if not expression and ( 7011 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7012 ): 7013 quoted = self._prev.token_type == TokenType.STRING 7014 expression = self._identifier_expression(quoted=quoted) 7015 7016 return expression 7017 7018 def _parse_string(self) -> t.Optional[exp.Expression]: 7019 if self._match_set(self.STRING_PARSERS): 7020 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7021 return self._parse_placeholder() 7022 7023 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7024 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7025 if output: 7026 output.update_positions(self._prev) 7027 return output 7028 7029 def _parse_number(self) -> t.Optional[exp.Expression]: 7030 if self._match_set(self.NUMERIC_PARSERS): 7031 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7032 return self._parse_placeholder() 7033 7034 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7035 if self._match(TokenType.IDENTIFIER): 7036 return self._identifier_expression(quoted=True) 7037 return self._parse_placeholder() 7038 7039 def _parse_var( 7040 self, 7041 any_token: bool = False, 7042 tokens: t.Optional[t.Collection[TokenType]] = None, 7043 upper: bool = False, 7044 ) -> t.Optional[exp.Expression]: 7045 if ( 7046 (any_token and self._advance_any()) 7047 or self._match(TokenType.VAR) 7048 or (self._match_set(tokens) if tokens else False) 7049 ): 7050 return self.expression( 7051 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7052 ) 7053 return self._parse_placeholder() 7054 7055 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7056 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7057 self._advance() 7058 return self._prev 7059 return None 7060 7061 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7062 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7063 7064 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7065 return self._parse_primary() or self._parse_var(any_token=True) 7066 7067 def _parse_null(self) -> t.Optional[exp.Expression]: 7068 if self._match_set(self.NULL_TOKENS): 7069 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7070 return self._parse_placeholder() 7071 7072 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7073 if self._match(TokenType.TRUE): 7074 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7075 if self._match(TokenType.FALSE): 7076 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7077 return self._parse_placeholder() 7078 7079 def _parse_star(self) -> t.Optional[exp.Expression]: 7080 if self._match(TokenType.STAR): 7081 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7082 return self._parse_placeholder() 7083 7084 def _parse_parameter(self) -> exp.Parameter: 7085 this = self._parse_identifier() or self._parse_primary_or_var() 7086 return self.expression(exp.Parameter, this=this) 7087 7088 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7089 if self._match_set(self.PLACEHOLDER_PARSERS): 7090 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7091 if placeholder: 7092 return placeholder 7093 self._advance(-1) 7094 return None 7095 7096 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7097 if not self._match_texts(keywords): 7098 return None 7099 if self._match(TokenType.L_PAREN, advance=False): 7100 return self._parse_wrapped_csv(self._parse_expression) 7101 7102 expression = self._parse_expression() 7103 return [expression] if expression else None 7104 7105 def _parse_csv( 7106 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7107 ) -> t.List[exp.Expression]: 7108 parse_result = parse_method() 7109 items = [parse_result] if parse_result is not None else [] 7110 7111 while self._match(sep): 7112 self._add_comments(parse_result) 7113 parse_result = parse_method() 7114 if parse_result is not None: 7115 items.append(parse_result) 7116 7117 return items 7118 7119 def _parse_tokens( 7120 self, parse_method: t.Callable, expressions: t.Dict 7121 ) -> t.Optional[exp.Expression]: 7122 this = parse_method() 7123 7124 while self._match_set(expressions): 7125 this = self.expression( 7126 expressions[self._prev.token_type], 7127 this=this, 7128 comments=self._prev_comments, 7129 expression=parse_method(), 7130 ) 7131 7132 return this 7133 7134 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7135 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7136 7137 def _parse_wrapped_csv( 7138 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7139 ) -> t.List[exp.Expression]: 7140 return self._parse_wrapped( 7141 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7142 ) 7143 7144 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7145 wrapped = self._match(TokenType.L_PAREN) 7146 if not wrapped and not optional: 7147 self.raise_error("Expecting (") 7148 parse_result = parse_method() 7149 if wrapped: 7150 self._match_r_paren() 7151 return parse_result 7152 7153 def _parse_expressions(self) -> t.List[exp.Expression]: 7154 return self._parse_csv(self._parse_expression) 7155 7156 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7157 return self._parse_select() or self._parse_set_operations( 7158 self._parse_alias(self._parse_assignment(), explicit=True) 7159 if alias 7160 else self._parse_assignment() 7161 ) 7162 7163 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7164 return self._parse_query_modifiers( 7165 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7166 ) 7167 7168 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7169 this = None 7170 if self._match_texts(self.TRANSACTION_KIND): 7171 this = self._prev.text 7172 7173 self._match_texts(("TRANSACTION", "WORK")) 7174 7175 modes = [] 7176 while True: 7177 mode = [] 7178 while self._match(TokenType.VAR): 7179 mode.append(self._prev.text) 7180 7181 if mode: 7182 modes.append(" ".join(mode)) 7183 if not self._match(TokenType.COMMA): 7184 break 7185 7186 return self.expression(exp.Transaction, this=this, modes=modes) 7187 7188 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7189 chain = None 7190 savepoint = None 7191 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7192 7193 self._match_texts(("TRANSACTION", "WORK")) 7194 7195 if self._match_text_seq("TO"): 7196 self._match_text_seq("SAVEPOINT") 7197 savepoint = self._parse_id_var() 7198 7199 if self._match(TokenType.AND): 7200 chain = not self._match_text_seq("NO") 7201 self._match_text_seq("CHAIN") 7202 7203 if is_rollback: 7204 return self.expression(exp.Rollback, savepoint=savepoint) 7205 7206 return self.expression(exp.Commit, chain=chain) 7207 7208 def _parse_refresh(self) -> exp.Refresh: 7209 self._match(TokenType.TABLE) 7210 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7211 7212 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7213 if not self._match_text_seq("ADD"): 7214 return None 7215 7216 self._match(TokenType.COLUMN) 7217 exists_column = self._parse_exists(not_=True) 7218 expression = self._parse_field_def() 7219 7220 if expression: 7221 expression.set("exists", exists_column) 7222 7223 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7224 if self._match_texts(("FIRST", "AFTER")): 7225 position = self._prev.text 7226 column_position = self.expression( 7227 exp.ColumnPosition, this=self._parse_column(), position=position 7228 ) 7229 expression.set("position", column_position) 7230 7231 return expression 7232 7233 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7234 drop = self._match(TokenType.DROP) and self._parse_drop() 7235 if drop and not isinstance(drop, exp.Command): 7236 drop.set("kind", drop.args.get("kind", "COLUMN")) 7237 return drop 7238 7239 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7240 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7241 return self.expression( 7242 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7243 ) 7244 7245 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7246 index = self._index - 1 7247 7248 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7249 return self._parse_csv( 7250 lambda: self.expression( 7251 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7252 ) 7253 ) 7254 7255 self._retreat(index) 7256 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7257 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7258 7259 if self._match_text_seq("ADD", "COLUMNS"): 7260 schema = self._parse_schema() 7261 if schema: 7262 return [schema] 7263 return [] 7264 7265 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7266 7267 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7268 if self._match_texts(self.ALTER_ALTER_PARSERS): 7269 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7270 7271 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7272 # keyword after ALTER we default to parsing this statement 7273 self._match(TokenType.COLUMN) 7274 column = self._parse_field(any_token=True) 7275 7276 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7277 return self.expression(exp.AlterColumn, this=column, drop=True) 7278 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7279 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7280 if self._match(TokenType.COMMENT): 7281 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7282 if self._match_text_seq("DROP", "NOT", "NULL"): 7283 return self.expression( 7284 exp.AlterColumn, 7285 this=column, 7286 drop=True, 7287 allow_null=True, 7288 ) 7289 if self._match_text_seq("SET", "NOT", "NULL"): 7290 return self.expression( 7291 exp.AlterColumn, 7292 this=column, 7293 allow_null=False, 7294 ) 7295 7296 if self._match_text_seq("SET", "VISIBLE"): 7297 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7298 if self._match_text_seq("SET", "INVISIBLE"): 7299 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7300 7301 self._match_text_seq("SET", "DATA") 7302 self._match_text_seq("TYPE") 7303 return self.expression( 7304 exp.AlterColumn, 7305 this=column, 7306 dtype=self._parse_types(), 7307 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7308 using=self._match(TokenType.USING) and self._parse_assignment(), 7309 ) 7310 7311 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7312 if self._match_texts(("ALL", "EVEN", "AUTO")): 7313 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7314 7315 self._match_text_seq("KEY", "DISTKEY") 7316 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7317 7318 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7319 if compound: 7320 self._match_text_seq("SORTKEY") 7321 7322 if self._match(TokenType.L_PAREN, advance=False): 7323 return self.expression( 7324 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7325 ) 7326 7327 self._match_texts(("AUTO", "NONE")) 7328 return self.expression( 7329 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7330 ) 7331 7332 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7333 index = self._index - 1 7334 7335 partition_exists = self._parse_exists() 7336 if self._match(TokenType.PARTITION, advance=False): 7337 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7338 7339 self._retreat(index) 7340 return self._parse_csv(self._parse_drop_column) 7341 7342 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7343 if self._match(TokenType.COLUMN): 7344 exists = self._parse_exists() 7345 old_column = self._parse_column() 7346 to = self._match_text_seq("TO") 7347 new_column = self._parse_column() 7348 7349 if old_column is None or to is None or new_column is None: 7350 return None 7351 7352 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7353 7354 self._match_text_seq("TO") 7355 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7356 7357 def _parse_alter_table_set(self) -> exp.AlterSet: 7358 alter_set = self.expression(exp.AlterSet) 7359 7360 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7361 "TABLE", "PROPERTIES" 7362 ): 7363 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7364 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7365 alter_set.set("expressions", [self._parse_assignment()]) 7366 elif self._match_texts(("LOGGED", "UNLOGGED")): 7367 alter_set.set("option", exp.var(self._prev.text.upper())) 7368 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7369 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7370 elif self._match_text_seq("LOCATION"): 7371 alter_set.set("location", self._parse_field()) 7372 elif self._match_text_seq("ACCESS", "METHOD"): 7373 alter_set.set("access_method", self._parse_field()) 7374 elif self._match_text_seq("TABLESPACE"): 7375 alter_set.set("tablespace", self._parse_field()) 7376 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7377 alter_set.set("file_format", [self._parse_field()]) 7378 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7379 alter_set.set("file_format", self._parse_wrapped_options()) 7380 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7381 alter_set.set("copy_options", self._parse_wrapped_options()) 7382 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7383 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7384 else: 7385 if self._match_text_seq("SERDE"): 7386 alter_set.set("serde", self._parse_field()) 7387 7388 alter_set.set("expressions", [self._parse_properties()]) 7389 7390 return alter_set 7391 7392 def _parse_alter(self) -> exp.Alter | exp.Command: 7393 start = self._prev 7394 7395 alter_token = self._match_set(self.ALTERABLES) and self._prev 7396 if not alter_token: 7397 return self._parse_as_command(start) 7398 7399 exists = self._parse_exists() 7400 only = self._match_text_seq("ONLY") 7401 this = self._parse_table(schema=True) 7402 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7403 7404 if self._next: 7405 self._advance() 7406 7407 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7408 if parser: 7409 actions = ensure_list(parser(self)) 7410 not_valid = self._match_text_seq("NOT", "VALID") 7411 options = self._parse_csv(self._parse_property) 7412 7413 if not self._curr and actions: 7414 return self.expression( 7415 exp.Alter, 7416 this=this, 7417 kind=alter_token.text.upper(), 7418 exists=exists, 7419 actions=actions, 7420 only=only, 7421 options=options, 7422 cluster=cluster, 7423 not_valid=not_valid, 7424 ) 7425 7426 return self._parse_as_command(start) 7427 7428 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7429 start = self._prev 7430 # https://duckdb.org/docs/sql/statements/analyze 7431 if not self._curr: 7432 return self.expression(exp.Analyze) 7433 7434 options = [] 7435 while self._match_texts(self.ANALYZE_STYLES): 7436 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7437 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7438 else: 7439 options.append(self._prev.text.upper()) 7440 7441 this: t.Optional[exp.Expression] = None 7442 inner_expression: t.Optional[exp.Expression] = None 7443 7444 kind = self._curr and self._curr.text.upper() 7445 7446 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7447 this = self._parse_table_parts() 7448 elif self._match_text_seq("TABLES"): 7449 if self._match_set((TokenType.FROM, TokenType.IN)): 7450 kind = f"{kind} {self._prev.text.upper()}" 7451 this = self._parse_table(schema=True, is_db_reference=True) 7452 elif self._match_text_seq("DATABASE"): 7453 this = self._parse_table(schema=True, is_db_reference=True) 7454 elif self._match_text_seq("CLUSTER"): 7455 this = self._parse_table() 7456 # Try matching inner expr keywords before fallback to parse table. 7457 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7458 kind = None 7459 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7460 else: 7461 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7462 kind = None 7463 this = self._parse_table_parts() 7464 7465 partition = self._try_parse(self._parse_partition) 7466 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7467 return self._parse_as_command(start) 7468 7469 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7470 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7471 "WITH", "ASYNC", "MODE" 7472 ): 7473 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7474 else: 7475 mode = None 7476 7477 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7478 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7479 7480 properties = self._parse_properties() 7481 return self.expression( 7482 exp.Analyze, 7483 kind=kind, 7484 this=this, 7485 mode=mode, 7486 partition=partition, 7487 properties=properties, 7488 expression=inner_expression, 7489 options=options, 7490 ) 7491 7492 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7493 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7494 this = None 7495 kind = self._prev.text.upper() 7496 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7497 expressions = [] 7498 7499 if not self._match_text_seq("STATISTICS"): 7500 self.raise_error("Expecting token STATISTICS") 7501 7502 if self._match_text_seq("NOSCAN"): 7503 this = "NOSCAN" 7504 elif self._match(TokenType.FOR): 7505 if self._match_text_seq("ALL", "COLUMNS"): 7506 this = "FOR ALL COLUMNS" 7507 if self._match_texts("COLUMNS"): 7508 this = "FOR COLUMNS" 7509 expressions = self._parse_csv(self._parse_column_reference) 7510 elif self._match_text_seq("SAMPLE"): 7511 sample = self._parse_number() 7512 expressions = [ 7513 self.expression( 7514 exp.AnalyzeSample, 7515 sample=sample, 7516 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7517 ) 7518 ] 7519 7520 return self.expression( 7521 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7522 ) 7523 7524 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7525 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7526 kind = None 7527 this = None 7528 expression: t.Optional[exp.Expression] = None 7529 if self._match_text_seq("REF", "UPDATE"): 7530 kind = "REF" 7531 this = "UPDATE" 7532 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7533 this = "UPDATE SET DANGLING TO NULL" 7534 elif self._match_text_seq("STRUCTURE"): 7535 kind = "STRUCTURE" 7536 if self._match_text_seq("CASCADE", "FAST"): 7537 this = "CASCADE FAST" 7538 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7539 ("ONLINE", "OFFLINE") 7540 ): 7541 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7542 expression = self._parse_into() 7543 7544 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7545 7546 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7547 this = self._prev.text.upper() 7548 if self._match_text_seq("COLUMNS"): 7549 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7550 return None 7551 7552 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7553 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7554 if self._match_text_seq("STATISTICS"): 7555 return self.expression(exp.AnalyzeDelete, kind=kind) 7556 return None 7557 7558 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7559 if self._match_text_seq("CHAINED", "ROWS"): 7560 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7561 return None 7562 7563 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7564 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7565 this = self._prev.text.upper() 7566 expression: t.Optional[exp.Expression] = None 7567 expressions = [] 7568 update_options = None 7569 7570 if self._match_text_seq("HISTOGRAM", "ON"): 7571 expressions = self._parse_csv(self._parse_column_reference) 7572 with_expressions = [] 7573 while self._match(TokenType.WITH): 7574 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7575 if self._match_texts(("SYNC", "ASYNC")): 7576 if self._match_text_seq("MODE", advance=False): 7577 with_expressions.append(f"{self._prev.text.upper()} MODE") 7578 self._advance() 7579 else: 7580 buckets = self._parse_number() 7581 if self._match_text_seq("BUCKETS"): 7582 with_expressions.append(f"{buckets} BUCKETS") 7583 if with_expressions: 7584 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7585 7586 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7587 TokenType.UPDATE, advance=False 7588 ): 7589 update_options = self._prev.text.upper() 7590 self._advance() 7591 elif self._match_text_seq("USING", "DATA"): 7592 expression = self.expression(exp.UsingData, this=self._parse_string()) 7593 7594 return self.expression( 7595 exp.AnalyzeHistogram, 7596 this=this, 7597 expressions=expressions, 7598 expression=expression, 7599 update_options=update_options, 7600 ) 7601 7602 def _parse_merge(self) -> exp.Merge: 7603 self._match(TokenType.INTO) 7604 target = self._parse_table() 7605 7606 if target and self._match(TokenType.ALIAS, advance=False): 7607 target.set("alias", self._parse_table_alias()) 7608 7609 self._match(TokenType.USING) 7610 using = self._parse_table() 7611 7612 self._match(TokenType.ON) 7613 on = self._parse_assignment() 7614 7615 return self.expression( 7616 exp.Merge, 7617 this=target, 7618 using=using, 7619 on=on, 7620 whens=self._parse_when_matched(), 7621 returning=self._parse_returning(), 7622 ) 7623 7624 def _parse_when_matched(self) -> exp.Whens: 7625 whens = [] 7626 7627 while self._match(TokenType.WHEN): 7628 matched = not self._match(TokenType.NOT) 7629 self._match_text_seq("MATCHED") 7630 source = ( 7631 False 7632 if self._match_text_seq("BY", "TARGET") 7633 else self._match_text_seq("BY", "SOURCE") 7634 ) 7635 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7636 7637 self._match(TokenType.THEN) 7638 7639 if self._match(TokenType.INSERT): 7640 this = self._parse_star() 7641 if this: 7642 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7643 else: 7644 then = self.expression( 7645 exp.Insert, 7646 this=exp.var("ROW") 7647 if self._match_text_seq("ROW") 7648 else self._parse_value(values=False), 7649 expression=self._match_text_seq("VALUES") and self._parse_value(), 7650 ) 7651 elif self._match(TokenType.UPDATE): 7652 expressions = self._parse_star() 7653 if expressions: 7654 then = self.expression(exp.Update, expressions=expressions) 7655 else: 7656 then = self.expression( 7657 exp.Update, 7658 expressions=self._match(TokenType.SET) 7659 and self._parse_csv(self._parse_equality), 7660 ) 7661 elif self._match(TokenType.DELETE): 7662 then = self.expression(exp.Var, this=self._prev.text) 7663 else: 7664 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7665 7666 whens.append( 7667 self.expression( 7668 exp.When, 7669 matched=matched, 7670 source=source, 7671 condition=condition, 7672 then=then, 7673 ) 7674 ) 7675 return self.expression(exp.Whens, expressions=whens) 7676 7677 def _parse_show(self) -> t.Optional[exp.Expression]: 7678 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7679 if parser: 7680 return parser(self) 7681 return self._parse_as_command(self._prev) 7682 7683 def _parse_set_item_assignment( 7684 self, kind: t.Optional[str] = None 7685 ) -> t.Optional[exp.Expression]: 7686 index = self._index 7687 7688 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7689 return self._parse_set_transaction(global_=kind == "GLOBAL") 7690 7691 left = self._parse_primary() or self._parse_column() 7692 assignment_delimiter = self._match_texts(("=", "TO")) 7693 7694 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7695 self._retreat(index) 7696 return None 7697 7698 right = self._parse_statement() or self._parse_id_var() 7699 if isinstance(right, (exp.Column, exp.Identifier)): 7700 right = exp.var(right.name) 7701 7702 this = self.expression(exp.EQ, this=left, expression=right) 7703 return self.expression(exp.SetItem, this=this, kind=kind) 7704 7705 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7706 self._match_text_seq("TRANSACTION") 7707 characteristics = self._parse_csv( 7708 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7709 ) 7710 return self.expression( 7711 exp.SetItem, 7712 expressions=characteristics, 7713 kind="TRANSACTION", 7714 **{"global": global_}, # type: ignore 7715 ) 7716 7717 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7718 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7719 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7720 7721 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7722 index = self._index 7723 set_ = self.expression( 7724 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7725 ) 7726 7727 if self._curr: 7728 self._retreat(index) 7729 return self._parse_as_command(self._prev) 7730 7731 return set_ 7732 7733 def _parse_var_from_options( 7734 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7735 ) -> t.Optional[exp.Var]: 7736 start = self._curr 7737 if not start: 7738 return None 7739 7740 option = start.text.upper() 7741 continuations = options.get(option) 7742 7743 index = self._index 7744 self._advance() 7745 for keywords in continuations or []: 7746 if isinstance(keywords, str): 7747 keywords = (keywords,) 7748 7749 if self._match_text_seq(*keywords): 7750 option = f"{option} {' '.join(keywords)}" 7751 break 7752 else: 7753 if continuations or continuations is None: 7754 if raise_unmatched: 7755 self.raise_error(f"Unknown option {option}") 7756 7757 self._retreat(index) 7758 return None 7759 7760 return exp.var(option) 7761 7762 def _parse_as_command(self, start: Token) -> exp.Command: 7763 while self._curr: 7764 self._advance() 7765 text = self._find_sql(start, self._prev) 7766 size = len(start.text) 7767 self._warn_unsupported() 7768 return exp.Command(this=text[:size], expression=text[size:]) 7769 7770 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7771 settings = [] 7772 7773 self._match_l_paren() 7774 kind = self._parse_id_var() 7775 7776 if self._match(TokenType.L_PAREN): 7777 while True: 7778 key = self._parse_id_var() 7779 value = self._parse_primary() 7780 if not key and value is None: 7781 break 7782 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7783 self._match(TokenType.R_PAREN) 7784 7785 self._match_r_paren() 7786 7787 return self.expression( 7788 exp.DictProperty, 7789 this=this, 7790 kind=kind.this if kind else None, 7791 settings=settings, 7792 ) 7793 7794 def _parse_dict_range(self, this: str) -> exp.DictRange: 7795 self._match_l_paren() 7796 has_min = self._match_text_seq("MIN") 7797 if has_min: 7798 min = self._parse_var() or self._parse_primary() 7799 self._match_text_seq("MAX") 7800 max = self._parse_var() or self._parse_primary() 7801 else: 7802 max = self._parse_var() or self._parse_primary() 7803 min = exp.Literal.number(0) 7804 self._match_r_paren() 7805 return self.expression(exp.DictRange, this=this, min=min, max=max) 7806 7807 def _parse_comprehension( 7808 self, this: t.Optional[exp.Expression] 7809 ) -> t.Optional[exp.Comprehension]: 7810 index = self._index 7811 expression = self._parse_column() 7812 if not self._match(TokenType.IN): 7813 self._retreat(index - 1) 7814 return None 7815 iterator = self._parse_column() 7816 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7817 return self.expression( 7818 exp.Comprehension, 7819 this=this, 7820 expression=expression, 7821 iterator=iterator, 7822 condition=condition, 7823 ) 7824 7825 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7826 if self._match(TokenType.HEREDOC_STRING): 7827 return self.expression(exp.Heredoc, this=self._prev.text) 7828 7829 if not self._match_text_seq("$"): 7830 return None 7831 7832 tags = ["$"] 7833 tag_text = None 7834 7835 if self._is_connected(): 7836 self._advance() 7837 tags.append(self._prev.text.upper()) 7838 else: 7839 self.raise_error("No closing $ found") 7840 7841 if tags[-1] != "$": 7842 if self._is_connected() and self._match_text_seq("$"): 7843 tag_text = tags[-1] 7844 tags.append("$") 7845 else: 7846 self.raise_error("No closing $ found") 7847 7848 heredoc_start = self._curr 7849 7850 while self._curr: 7851 if self._match_text_seq(*tags, advance=False): 7852 this = self._find_sql(heredoc_start, self._prev) 7853 self._advance(len(tags)) 7854 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7855 7856 self._advance() 7857 7858 self.raise_error(f"No closing {''.join(tags)} found") 7859 return None 7860 7861 def _find_parser( 7862 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7863 ) -> t.Optional[t.Callable]: 7864 if not self._curr: 7865 return None 7866 7867 index = self._index 7868 this = [] 7869 while True: 7870 # The current token might be multiple words 7871 curr = self._curr.text.upper() 7872 key = curr.split(" ") 7873 this.append(curr) 7874 7875 self._advance() 7876 result, trie = in_trie(trie, key) 7877 if result == TrieResult.FAILED: 7878 break 7879 7880 if result == TrieResult.EXISTS: 7881 subparser = parsers[" ".join(this)] 7882 return subparser 7883 7884 self._retreat(index) 7885 return None 7886 7887 def _match(self, token_type, advance=True, expression=None): 7888 if not self._curr: 7889 return None 7890 7891 if self._curr.token_type == token_type: 7892 if advance: 7893 self._advance() 7894 self._add_comments(expression) 7895 return True 7896 7897 return None 7898 7899 def _match_set(self, types, advance=True): 7900 if not self._curr: 7901 return None 7902 7903 if self._curr.token_type in types: 7904 if advance: 7905 self._advance() 7906 return True 7907 7908 return None 7909 7910 def _match_pair(self, token_type_a, token_type_b, advance=True): 7911 if not self._curr or not self._next: 7912 return None 7913 7914 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7915 if advance: 7916 self._advance(2) 7917 return True 7918 7919 return None 7920 7921 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7922 if not self._match(TokenType.L_PAREN, expression=expression): 7923 self.raise_error("Expecting (") 7924 7925 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7926 if not self._match(TokenType.R_PAREN, expression=expression): 7927 self.raise_error("Expecting )") 7928 7929 def _match_texts(self, texts, advance=True): 7930 if ( 7931 self._curr 7932 and self._curr.token_type != TokenType.STRING 7933 and self._curr.text.upper() in texts 7934 ): 7935 if advance: 7936 self._advance() 7937 return True 7938 return None 7939 7940 def _match_text_seq(self, *texts, advance=True): 7941 index = self._index 7942 for text in texts: 7943 if ( 7944 self._curr 7945 and self._curr.token_type != TokenType.STRING 7946 and self._curr.text.upper() == text 7947 ): 7948 self._advance() 7949 else: 7950 self._retreat(index) 7951 return None 7952 7953 if not advance: 7954 self._retreat(index) 7955 7956 return True 7957 7958 def _replace_lambda( 7959 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7960 ) -> t.Optional[exp.Expression]: 7961 if not node: 7962 return node 7963 7964 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7965 7966 for column in node.find_all(exp.Column): 7967 typ = lambda_types.get(column.parts[0].name) 7968 if typ is not None: 7969 dot_or_id = column.to_dot() if column.table else column.this 7970 7971 if typ: 7972 dot_or_id = self.expression( 7973 exp.Cast, 7974 this=dot_or_id, 7975 to=typ, 7976 ) 7977 7978 parent = column.parent 7979 7980 while isinstance(parent, exp.Dot): 7981 if not isinstance(parent.parent, exp.Dot): 7982 parent.replace(dot_or_id) 7983 break 7984 parent = parent.parent 7985 else: 7986 if column is node: 7987 node = dot_or_id 7988 else: 7989 column.replace(dot_or_id) 7990 return node 7991 7992 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7993 start = self._prev 7994 7995 # Not to be confused with TRUNCATE(number, decimals) function call 7996 if self._match(TokenType.L_PAREN): 7997 self._retreat(self._index - 2) 7998 return self._parse_function() 7999 8000 # Clickhouse supports TRUNCATE DATABASE as well 8001 is_database = self._match(TokenType.DATABASE) 8002 8003 self._match(TokenType.TABLE) 8004 8005 exists = self._parse_exists(not_=False) 8006 8007 expressions = self._parse_csv( 8008 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8009 ) 8010 8011 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8012 8013 if self._match_text_seq("RESTART", "IDENTITY"): 8014 identity = "RESTART" 8015 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8016 identity = "CONTINUE" 8017 else: 8018 identity = None 8019 8020 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8021 option = self._prev.text 8022 else: 8023 option = None 8024 8025 partition = self._parse_partition() 8026 8027 # Fallback case 8028 if self._curr: 8029 return self._parse_as_command(start) 8030 8031 return self.expression( 8032 exp.TruncateTable, 8033 expressions=expressions, 8034 is_database=is_database, 8035 exists=exists, 8036 cluster=cluster, 8037 identity=identity, 8038 option=option, 8039 partition=partition, 8040 ) 8041 8042 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8043 this = self._parse_ordered(self._parse_opclass) 8044 8045 if not self._match(TokenType.WITH): 8046 return this 8047 8048 op = self._parse_var(any_token=True) 8049 8050 return self.expression(exp.WithOperator, this=this, op=op) 8051 8052 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8053 self._match(TokenType.EQ) 8054 self._match(TokenType.L_PAREN) 8055 8056 opts: t.List[t.Optional[exp.Expression]] = [] 8057 option: exp.Expression | None 8058 while self._curr and not self._match(TokenType.R_PAREN): 8059 if self._match_text_seq("FORMAT_NAME", "="): 8060 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8061 option = self._parse_format_name() 8062 else: 8063 option = self._parse_property() 8064 8065 if option is None: 8066 self.raise_error("Unable to parse option") 8067 break 8068 8069 opts.append(option) 8070 8071 return opts 8072 8073 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8074 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8075 8076 options = [] 8077 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8078 option = self._parse_var(any_token=True) 8079 prev = self._prev.text.upper() 8080 8081 # Different dialects might separate options and values by white space, "=" and "AS" 8082 self._match(TokenType.EQ) 8083 self._match(TokenType.ALIAS) 8084 8085 param = self.expression(exp.CopyParameter, this=option) 8086 8087 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8088 TokenType.L_PAREN, advance=False 8089 ): 8090 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8091 param.set("expressions", self._parse_wrapped_options()) 8092 elif prev == "FILE_FORMAT": 8093 # T-SQL's external file format case 8094 param.set("expression", self._parse_field()) 8095 else: 8096 param.set("expression", self._parse_unquoted_field()) 8097 8098 options.append(param) 8099 self._match(sep) 8100 8101 return options 8102 8103 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8104 expr = self.expression(exp.Credentials) 8105 8106 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8107 expr.set("storage", self._parse_field()) 8108 if self._match_text_seq("CREDENTIALS"): 8109 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8110 creds = ( 8111 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8112 ) 8113 expr.set("credentials", creds) 8114 if self._match_text_seq("ENCRYPTION"): 8115 expr.set("encryption", self._parse_wrapped_options()) 8116 if self._match_text_seq("IAM_ROLE"): 8117 expr.set("iam_role", self._parse_field()) 8118 if self._match_text_seq("REGION"): 8119 expr.set("region", self._parse_field()) 8120 8121 return expr 8122 8123 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8124 return self._parse_field() 8125 8126 def _parse_copy(self) -> exp.Copy | exp.Command: 8127 start = self._prev 8128 8129 self._match(TokenType.INTO) 8130 8131 this = ( 8132 self._parse_select(nested=True, parse_subquery_alias=False) 8133 if self._match(TokenType.L_PAREN, advance=False) 8134 else self._parse_table(schema=True) 8135 ) 8136 8137 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8138 8139 files = self._parse_csv(self._parse_file_location) 8140 credentials = self._parse_credentials() 8141 8142 self._match_text_seq("WITH") 8143 8144 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8145 8146 # Fallback case 8147 if self._curr: 8148 return self._parse_as_command(start) 8149 8150 return self.expression( 8151 exp.Copy, 8152 this=this, 8153 kind=kind, 8154 credentials=credentials, 8155 files=files, 8156 params=params, 8157 ) 8158 8159 def _parse_normalize(self) -> exp.Normalize: 8160 return self.expression( 8161 exp.Normalize, 8162 this=self._parse_bitwise(), 8163 form=self._match(TokenType.COMMA) and self._parse_var(), 8164 ) 8165 8166 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8167 args = self._parse_csv(lambda: self._parse_lambda()) 8168 8169 this = seq_get(args, 0) 8170 decimals = seq_get(args, 1) 8171 8172 return expr_type( 8173 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8174 ) 8175 8176 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8177 if self._match_text_seq("COLUMNS", "(", advance=False): 8178 this = self._parse_function() 8179 if isinstance(this, exp.Columns): 8180 this.set("unpack", True) 8181 return this 8182 8183 return self.expression( 8184 exp.Star, 8185 **{ # type: ignore 8186 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8187 "replace": self._parse_star_op("REPLACE"), 8188 "rename": self._parse_star_op("RENAME"), 8189 }, 8190 ) 8191 8192 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8193 privilege_parts = [] 8194 8195 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8196 # (end of privilege list) or L_PAREN (start of column list) are met 8197 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8198 privilege_parts.append(self._curr.text.upper()) 8199 self._advance() 8200 8201 this = exp.var(" ".join(privilege_parts)) 8202 expressions = ( 8203 self._parse_wrapped_csv(self._parse_column) 8204 if self._match(TokenType.L_PAREN, advance=False) 8205 else None 8206 ) 8207 8208 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8209 8210 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8211 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8212 principal = self._parse_id_var() 8213 8214 if not principal: 8215 return None 8216 8217 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8218 8219 def _parse_grant(self) -> exp.Grant | exp.Command: 8220 start = self._prev 8221 8222 privileges = self._parse_csv(self._parse_grant_privilege) 8223 8224 self._match(TokenType.ON) 8225 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8226 8227 # Attempt to parse the securable e.g. MySQL allows names 8228 # such as "foo.*", "*.*" which are not easily parseable yet 8229 securable = self._try_parse(self._parse_table_parts) 8230 8231 if not securable or not self._match_text_seq("TO"): 8232 return self._parse_as_command(start) 8233 8234 principals = self._parse_csv(self._parse_grant_principal) 8235 8236 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8237 8238 if self._curr: 8239 return self._parse_as_command(start) 8240 8241 return self.expression( 8242 exp.Grant, 8243 privileges=privileges, 8244 kind=kind, 8245 securable=securable, 8246 principals=principals, 8247 grant_option=grant_option, 8248 ) 8249 8250 def _parse_overlay(self) -> exp.Overlay: 8251 return self.expression( 8252 exp.Overlay, 8253 **{ # type: ignore 8254 "this": self._parse_bitwise(), 8255 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8256 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8257 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8258 }, 8259 ) 8260 8261 def _parse_format_name(self) -> exp.Property: 8262 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8263 # for FILE_FORMAT = <format_name> 8264 return self.expression( 8265 exp.Property, 8266 this=exp.var("FORMAT_NAME"), 8267 value=self._parse_string() or self._parse_table_parts(), 8268 ) 8269 8270 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8271 args: t.List[exp.Expression] = [] 8272 8273 if self._match(TokenType.DISTINCT): 8274 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8275 self._match(TokenType.COMMA) 8276 8277 args.extend(self._parse_csv(self._parse_assignment)) 8278 8279 return self.expression( 8280 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8281 ) 8282 8283 def _identifier_expression( 8284 self, token: t.Optional[Token] = None, **kwargs: t.Any 8285 ) -> exp.Identifier: 8286 token = token or self._prev 8287 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8288 expression.update_positions(token) 8289 return expression
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1522 def __init__( 1523 self, 1524 error_level: t.Optional[ErrorLevel] = None, 1525 error_message_context: int = 100, 1526 max_errors: int = 3, 1527 dialect: DialectType = None, 1528 ): 1529 from sqlglot.dialects import Dialect 1530 1531 self.error_level = error_level or ErrorLevel.IMMEDIATE 1532 self.error_message_context = error_message_context 1533 self.max_errors = max_errors 1534 self.dialect = Dialect.get_or_raise(dialect) 1535 self.reset()
1547 def parse( 1548 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1549 ) -> t.List[t.Optional[exp.Expression]]: 1550 """ 1551 Parses a list of tokens and returns a list of syntax trees, one tree 1552 per parsed SQL statement. 1553 1554 Args: 1555 raw_tokens: The list of tokens. 1556 sql: The original SQL string, used to produce helpful debug messages. 1557 1558 Returns: 1559 The list of the produced syntax trees. 1560 """ 1561 return self._parse( 1562 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1563 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1565 def parse_into( 1566 self, 1567 expression_types: exp.IntoType, 1568 raw_tokens: t.List[Token], 1569 sql: t.Optional[str] = None, 1570 ) -> t.List[t.Optional[exp.Expression]]: 1571 """ 1572 Parses a list of tokens into a given Expression type. If a collection of Expression 1573 types is given instead, this method will try to parse the token list into each one 1574 of them, stopping at the first for which the parsing succeeds. 1575 1576 Args: 1577 expression_types: The expression type(s) to try and parse the token list into. 1578 raw_tokens: The list of tokens. 1579 sql: The original SQL string, used to produce helpful debug messages. 1580 1581 Returns: 1582 The target Expression. 1583 """ 1584 errors = [] 1585 for expression_type in ensure_list(expression_types): 1586 parser = self.EXPRESSION_PARSERS.get(expression_type) 1587 if not parser: 1588 raise TypeError(f"No parser registered for {expression_type}") 1589 1590 try: 1591 return self._parse(parser, raw_tokens, sql) 1592 except ParseError as e: 1593 e.errors[0]["into_expression"] = expression_type 1594 errors.append(e) 1595 1596 raise ParseError( 1597 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1598 errors=merge_errors(errors), 1599 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1639 def check_errors(self) -> None: 1640 """Logs or raises any found errors, depending on the chosen error level setting.""" 1641 if self.error_level == ErrorLevel.WARN: 1642 for error in self.errors: 1643 logger.error(str(error)) 1644 elif self.error_level == ErrorLevel.RAISE and self.errors: 1645 raise ParseError( 1646 concat_messages(self.errors, self.max_errors), 1647 errors=merge_errors(self.errors), 1648 )
Logs or raises any found errors, depending on the chosen error level setting.
1650 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1651 """ 1652 Appends an error in the list of recorded errors or raises it, depending on the chosen 1653 error level setting. 1654 """ 1655 token = token or self._curr or self._prev or Token.string("") 1656 start = token.start 1657 end = token.end + 1 1658 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1659 highlight = self.sql[start:end] 1660 end_context = self.sql[end : end + self.error_message_context] 1661 1662 error = ParseError.new( 1663 f"{message}. Line {token.line}, Col: {token.col}.\n" 1664 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1665 description=message, 1666 line=token.line, 1667 col=token.col, 1668 start_context=start_context, 1669 highlight=highlight, 1670 end_context=end_context, 1671 ) 1672 1673 if self.error_level == ErrorLevel.IMMEDIATE: 1674 raise error 1675 1676 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1678 def expression( 1679 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1680 ) -> E: 1681 """ 1682 Creates a new, validated Expression. 1683 1684 Args: 1685 exp_class: The expression class to instantiate. 1686 comments: An optional list of comments to attach to the expression. 1687 kwargs: The arguments to set for the expression along with their respective values. 1688 1689 Returns: 1690 The target expression. 1691 """ 1692 instance = exp_class(**kwargs) 1693 instance.add_comments(comments) if comments else self._add_comments(instance) 1694 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1701 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1702 """ 1703 Validates an Expression, making sure that all its mandatory arguments are set. 1704 1705 Args: 1706 expression: The expression to validate. 1707 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1708 1709 Returns: 1710 The validated expression. 1711 """ 1712 if self.error_level != ErrorLevel.IGNORE: 1713 for error_message in expression.error_messages(args): 1714 self.raise_error(error_message) 1715 1716 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4689 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4690 start = self._index 4691 _, side_token, kind_token = self._parse_join_parts() 4692 4693 side = side_token.text if side_token else None 4694 kind = kind_token.text if kind_token else None 4695 4696 if not self._match_set(self.SET_OPERATIONS): 4697 self._retreat(start) 4698 return None 4699 4700 token_type = self._prev.token_type 4701 4702 if token_type == TokenType.UNION: 4703 operation: t.Type[exp.SetOperation] = exp.Union 4704 elif token_type == TokenType.EXCEPT: 4705 operation = exp.Except 4706 else: 4707 operation = exp.Intersect 4708 4709 comments = self._prev.comments 4710 4711 if self._match(TokenType.DISTINCT): 4712 distinct: t.Optional[bool] = True 4713 elif self._match(TokenType.ALL): 4714 distinct = False 4715 else: 4716 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4717 if distinct is None: 4718 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4719 4720 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4721 "STRICT", "CORRESPONDING" 4722 ) 4723 if self._match_text_seq("CORRESPONDING"): 4724 by_name = True 4725 if not side and not kind: 4726 kind = "INNER" 4727 4728 on_column_list = None 4729 if by_name and self._match_texts(("ON", "BY")): 4730 on_column_list = self._parse_wrapped_csv(self._parse_column) 4731 4732 expression = self._parse_select(nested=True, parse_set_operation=False) 4733 4734 return self.expression( 4735 operation, 4736 comments=comments, 4737 this=this, 4738 distinct=distinct, 4739 by_name=by_name, 4740 expression=expression, 4741 side=side, 4742 kind=kind, 4743 on=on_column_list, 4744 )